Nothing
#--last updated 1/7/2025 11:24 AM
.onAttach <- function(libname, pkgname) {
packageStartupMessage("
========================================
PCAPAM50 version 1.0.3
Package: https://cran.r-project.org/package=PCAPAM50
Documentation: https://www.wriwindber.org/tools-portal/pcapam50/
If you use it in published research, please cite:
PCAPAM50: Raj-Kumar PK, et al.Scientific reports. 2019 May 28;9(1):7956.
PAM50: Parker JS, et al. Journal of clinical oncology. 2009 Mar 3;27(8):1160.
This message can be suppressed by:
suppressPackageStartupMessages(library(PCAPAM50))
========================================
")
}
######
##
my.plotPCA = function(x, intgroup, ablne = 0,colours = c("red","hotpink","darkblue","lightblue","red3","hotpink3","royalblue3","lightskyblue3"),LINE.V=TRUE)
{
# Assuming 'exprs' is correctly defined or imported; using Biobase::exprs explicitly if from Biobase package
fac = factor(intgroup)
if (nlevels(fac) == 0) {
stop("intgroup has no valid levels. Please check your 'intgroup' data.")
}
# Check if number of colors matches number of factor levels in intgroup
if (length(colours) < nlevels(fac)) {
stop("Please provide enough colors to match the number of levels in the intgroup factor.")
}
# Perform PCA
pca = prcomp(t(Biobase::exprs(x)))
# Define key text based on factor levels
key_text <- levels(fac)
# Generate the PCA plot using lattice::xyplot
pcafig = lattice::xyplot(PC2 ~ PC1, groups = fac, data = as.data.frame(pca$x), xlab = "PC1",
panel = function(x, y, ...) {
lattice::panel.xyplot(x, y, ...)
if (LINE.V) {
lattice::panel.abline(v = ablne, lty = 3, lwd = 1.5)
}
},
key = list(space = "right",
text = list(labels = levels(fac)),
points = list(pch = 16, cex = 2, col = colours),
columns = min(nlevels(fac), 2)),
pch = 16,
cex = 2,
aspect = "iso",
col = colours)
return(pcafig)
}
#### function to form a ER-balance subset and derive its median---write it to PAM50 dir
makeCalls.ihc = function(df.cln, seed=118, mat, outDir=NULL){
#message("###clinical subtype data.frame should have a column --PatientID-- with which mat cols are also named")
#message("##IHC subtype column should be named ---IHC---")
if (is.null(outDir)) {
outDir <- file.path(tempdir(), "Calls.PAM50")
}
message("outDir: ", outDir)
# Check and create directory if it doesn't exist
if (!dir.exists(outDir)) {
dir.create(outDir, recursive = TRUE)
message("Directory created: ", outDir)
} else {
message("Directory already exists: ", outDir)
}
# Initial checks for 'df.cln' and 'mat'
if (is.null(df.cln) || !'PatientID' %in% colnames(df.cln) || !'IHC' %in% colnames(df.cln)) {
stop("Clinical data 'df.cln' is missing or does not contain required columns 'PatientID' and 'IHC'. Refer to the vignette to create 'test.clinical' data.")
}
if (is.null(mat) || !is.matrix(mat)) {
stop("Gene expression matrix 'mat' is missing or not correctly formatted. Refer to the vignette to create 'test.matrix' data.")
}
# Continue with function if checks pass
message("All necessary input data are correctly formatted and present. Grab a cup of coffee while we do the heavy lifting for you!")
# Save the current working directory and ensure it is reset upon function exit
oldwd <- getwd()
on.exit(setwd(oldwd))
# Save the current par settings
#oldpar <- par(no.readonly = TRUE)
#oldpar$new <- NULL # Remove the problematic 'new' parameter
# Ensure the par settings are reset when the function exits
#on.exit(par(oldpar))
#becuase this creates a empty plot I am doing the following
if (!is.null(dev.list())) {
# Save the current graphical parameters
oldpar <- par(no.readonly = TRUE)
# Ensure the graphical parameters are restored on function exit
on.exit(par(oldpar))
} else {
message("No active graphics device; skipping par save.")
}
#--Prepare the required input parameters for PAM50 method
predFiles = file.path(outDir, "Input_pam50genes_matrix.txt")#provided inputFile for predictions
short = "Ihc.Mdns"# short name that will be used for output files
calibrationParameters = "Mdns.PAM50"# suffix for naming intrinsic subtype column in o/p
Out.mdns.fl = "Mdns.PAM50.txt"## where new medians will be written
calibrationFile = file.path(outDir, Out.mdns.fl)
hasClinical = F #--PCAPAM50 donot use clinical info module of PAM50 classifier
# Write the Test.matrix to the file
write.table(mat, predFiles, sep = "\t", col.names = NA)
# Verify file creation
if (!file.exists(predFiles)) {
stop("File not created: ", predFiles)
}
# Define parameters
paramDir <- system.file( "PAM50/bioclassifier_R", package = "PCAPAM50")#"extdata",
if (!file.exists(paramDir)) {
stop("Parameter directory not found: ", paramDir)
}
# Print paths for debugging
#message("paramater Dir path: ", paramDir, "\n")
#message("Input File path: ", predFiles, "\n")
#message("Output Median File path: ", calibrationFile, "\n")
# Convert IHC column to uppercase to handle case insensitivity
df.cln$IHC = toupper(df.cln$IHC)
# Get ER- samples data.frame by excluding those starting with "L"
ERN.ihc = df.cln[!grepl("^L", df.cln$IHC), ]
#ERN.ihc = df.cln[which(df.cln$IHC %in% c("TN","Her2+")),] ### get ER- samples data.frame
dim(ERN.ihc) #[1] 153 9
# Get ER+ samples data.frame (subtypes starting with "L")
ERP.ihc <- df.cln[grepl("^L", df.cln$IHC), ]
#ERP.ihc = df.cln[which(df.cln$IHC %in% c("LA","LB1","LB2")),]
dim(ERP.ihc) #[1] 559 9
# Determine the smaller size between ER+ and ER-
sample_size <- min(dim(ERN.ihc)[1], dim(ERP.ihc)[1])
# Sample equal number of ER+ and ER- samples
set.seed(seed) # Set seed for reproducibility
if (dim(ERN.ihc)[1] <= dim(ERP.ihc)[1]) {
i <- sample(dim(ERP.ihc)[1], sample_size)
ERP.ihc_sampled <- ERP.ihc[i, ]
ERN.ihc_sampled <- ERN.ihc
} else {
i <- sample(dim(ERN.ihc)[1], sample_size)
ERN.ihc_sampled <- ERN.ihc[i, ]
ERP.ihc_sampled <- ERP.ihc
}
# Subset PAM50 matrix with the IDs corresponding to balanced ER+ and ER-
mbal.ihc <- mat[, c(ERP.ihc_sampled$PatientID, ERN.ihc_sampled$PatientID)]
#set.seed(seed);i = sample(dim(ERP.ihc)[1],dim(ERN.ihc)[1]) # take equal number of ER+ and ER- samples
#length(ERP.ihc$PatientID[i]) # ER positive samples
#length(ERN.ihc$PatientID) # ER negative samples
######=== subset pam50 matrix with the IDs corresponding to balanced ER+ and ER-
#mbal.ihc = mat[,c(ERP.ihc$PatientID[i],ERN.ihc$PatientID)]
#dim(mbal.ihc) #[1] 50 306
# Find median
mdns = apply(mbal.ihc,1,median,na.rm=T) # compute median of each row i.e gene
mdns.df = as.data.frame(mdns)
df.mdns = data.frame(X=rownames(mdns.df),mdns.ihc=mdns.df$mdns) # ER-blanced set based on IHC status alone---
colnames(df.mdns) = c("X",calibrationParameters)
# merge this median to PAM50-medians file "mediansPerDataset_v2.txt"
fl.nm = paste(paramDir,"mediansPerDataset_v2.txt",sep="/")
#message("Reading file: ", fl.nm)
if (!file.exists(fl.nm)) {
stop("File not found: ", fl.nm)
}
fl.mdn = read.table(fl.nm,sep="\t",header=T,stringsAsFactors=F)
df.al = merge(fl.mdn,df.mdns,by="X")
#save(df.al,file="./Rdat/mediansPerDataset_15Nov17.Rdat")
write.table(df.al, file=calibrationFile, sep="\t", col.names=T,row.names=F,quote=F)
####
# run the assignment algorithm
# all the global variable required for assignment algorithm are set outside of function where it is been called
####
#message("Sourcing: ", paste(paramDir, "subtypePrediction_distributed_PNmodified.R", sep = "/"))
#message("Sourcing: ", paste(paramDir, "subtypePrediction_functions_PNmodified.R", sep = "/"))
source(paste(paramDir,"subtypePrediction_functions_PNmodified.R",sep="/"))
source(paste(paramDir,"subtypePrediction_distributed_PNmodified.R",sep="/"))
# Call the modified function
res = subtypePrediction_distributed(paramDir, outDir, short, predFiles, hasClinical, calibrationFile, calibrationParameters)
# Verify the expected output file paths
ot.fl = paste(outDir, "/", short, "_pam50scores.txt", sep = "")
message("Expected output file: ", ot.fl)
kl = read.table(ot.fl,sep="\t",header=T,stringsAsFactors=F)#note md -- for median centered
df.kl = data.frame(PatientID=kl$X,Int.SBS.ihcMd=kl$Call,stringsAsFactors=FALSE)
colnames(df.kl) = c("PatientID",paste("Int.SBS",calibrationParameters,sep="."))
df.kl = merge(df.kl,df.cln,by="PatientID")
return(list(Int.sbs=df.kl, score.fl=kl, mdns.fl=df.al, SBS.colr = res$subtypeColors, outList=res$out))
}
#### function to form a ER-balance subet and derive its median---write it to PAM50 dir
makeCalls.PC1ihc = function(df.cln, seed=118, mat, outDir=NULL){
#message("###clinical subtype data.frame should have a column --PatientID-- with which mat cols are also named")
#message("##IHC subtype column should be named ---IHC---")
# Set the default outDir to a temporary directory if not provided
if (is.null(outDir)) {
outDir <- file.path(tempdir(), "Calls.PC1ihc")
}
message("outDir: ", outDir)
# Check and create directory if it doesn't exist
if (!dir.exists(outDir)) {
dir.create(outDir, recursive = TRUE)
message("Directory created: ", outDir)
} else {
message("Directory already exists: ", outDir)
}
# Initial checks for 'df.cln' and 'mat'
if (is.null(df.cln) || !'PatientID' %in% colnames(df.cln) || !'IHC' %in% colnames(df.cln)) {
stop("Clinical data 'df.cln' is missing or does not contain required columns 'PatientID' and 'IHC'. Refer to the vignette to create 'test.clinical' data.")
}
if (is.null(mat) || !is.matrix(mat)) {
stop("Gene expression matrix 'mat' is missing or not correctly formatted. Refer to the vignette to create 'test.matrix' data.")
}
# Continue with function if checks pass
message("All necessary input data are correctly formatted and present. Grab a cup of coffee while we do the heavy lifting for you!")
# Save the current working directory and ensure it is reset upon function exit
oldwd <- getwd()
on.exit(setwd(oldwd))
# Save the current par settings
#oldpar <- par(no.readonly = TRUE)
#oldpar$new <- NULL # Remove the problematic 'new' parameter
# Ensure the par settings are reset when the function exits
#on.exit(par(oldpar))
#becuase this creates a empty plot I am doing the following
if (!is.null(dev.list())) {
# Save the current graphical parameters
oldpar <- par(no.readonly = TRUE)
# Ensure the graphical parameters are restored on function exit
on.exit(par(oldpar))
} else {
message("No active graphics device; skipping par save.")
}
#--Prepare the required input parameters for PAM50 method
predFiles = file.path(outDir, "Input_pam50genes_matrix.txt")#provided inputFile for predictions
short = "PC1ihc.Mdns"# short name that will be used for output files
calibrationParameters = "Mdns.PC1ihcPAM50"# suffix for naming intrinsic subtype column in o/p
Out.mdns.fl = "Mdns.PC1ihcPAM50.txt"## where new medians will be written
calibrationFile = file.path(outDir, Out.mdns.fl)
hasClinical = F #--PCAPAM50 donot use clinical info module of PAM50 classifier
# Write the Test.matrix to the file
write.table(mat, predFiles, sep = "\t", col.names = NA)
# Verify file creation
if (!file.exists(predFiles)) {
stop("File not created: ", predFiles)
}
# Define parameters
paramDir <- system.file( "PAM50/bioclassifier_R", package = "PCAPAM50")#"extdata",
if (!file.exists(paramDir)) {
stop("Parameter directory not found: ", paramDir)
}
# Print paths for debugging
#message("paramater Dir path: ", paramDir, "\n")
#message("Input File path: ", predFiles, "\n")
#message("Output Median File path: ", calibrationFile, "\n")
# Pull the PCA components
#rv = rowVars(mat)
#select = order(rv, decreasing = TRUE)[seq_len(dim(mat)[1])] # the input is PAM50 matrix --50 genes -- get from dimension
pca = prcomp(t(mat))#[select,]
pc12 = pca$x[,1:2] #get two principal
df.pc1 = data.frame(PatientID=rownames(pc12),PC1 = pc12[,1],stringsAsFactors=F)
df.pca1 = merge(df.cln,df.pc1,by="PatientID")
#--our function works best if majority of ER- cases fall in the positive PC1 axis--check
# Identify ER-negative cases
er_negative <- !grepl("^L", df.pca1$IHC)
# Determine if the majority of ER-negative cases fall in the negative axis of PC1
if (sum(df.pca1$PC1[er_negative] < 0) > sum(df.pca1$PC1[er_negative] >= 0)) {
#print("yes")
df.pca1$PC1 <- -df.pca1$PC1
}
# Ensure that IHC is not a factor or has all necessary levels defined
if (is.factor(df.pca1$IHC)) {
df.pca1$IHC = as.character(df.pca1$IHC)
}
# Convert IHC column to uppercase to handle case insensitivity
df.pca1$IHC <- toupper(df.pca1$IHC)
# Function to count the number of misclassified cases on a given PC1 point ---find the cutoff
getno = function(x) {
p.rgt = length(which(grepl("^L", df.pca1$IHC) & df.pca1$PC1 > x)) / length(which(grepl("^L", df.pca1$IHC)))
n.lft = length(which(!grepl("^L", df.pca1$IHC) & df.pca1$PC1 < x)) / length(which(!grepl("^L", df.pca1$IHC)))
tot = (p.rgt + n.lft) * 100
return(list(PC1 = x, Mis = tot))
}
### function to count the number of mis-classified cases on a given PC1 point ---find the cutoff
#getno = function(x){
# p.rgt = length(which(df.pca1$IHC %in% c("LB1","LB2","LA") & df.pca1$PC1 >x))/length(which(df.pca1$IHC %in% c("LB1","LB2","LA") ))
# n.lft = length(which(df.pca1$IHC %in% c("Her2+","TN") & df.pca1$PC1 <x))/ length(which(df.pca1$IHC %in% c("Her2+","TN")))
# tot = (p.rgt + n.lft) * 100
# return(list(PC1=x,Mis=tot))
#}
df.mis = do.call(rbind.data.frame,lapply(seq(-20,20,by=0.1),getno))
num.min = df.mis$PC1[which(df.mis$Mis == min(df.mis$Mis))]
plt.fl = paste(outDir,"PC1_discordance_cases.png",sep="/")
png(filename = plt.fl, width = 1400, height = 1500, units = "px", pointsize = 10, bg = "white", type = c("cairo"),res=200)
par(mar=c(5,5,1,1))
plot(x=df.mis$PC1, y=df.mis$Mis,xlab="PC1",ylab="% of discordance cases",type="l",lwd=2,col="red",lty=1,cex.axis=2,cex.lab=2)
abline(v=num.min, lty=3,lwd=3)
dev.off()
#ERP.pc1ihc = df.pca1[which(df.pca1$IHC %in% c("LB1","LB2","LA") & df.pca1$PC1 <= mean(num.min)),] # used mean to overcome situation where there are two minimum
#ERN.pc1ihc = df.pca1[which(df.pca1$IHC %in% c("Her2+","TN") & df.pca1$PC1 > mean(num.min)),]
ERP.pc1ihc = df.pca1[which(grepl("^L", df.pca1$IHC) & df.pca1$PC1 <= mean(num.min)), ] # used mean to overcome situation where there are two minimum
ERN.pc1ihc = df.pca1[which(!grepl("^L", df.pca1$IHC) & df.pca1$PC1 > mean(num.min)), ]
dim(ERP.pc1ihc)# 543 3
dim(ERN.pc1ihc)# 118 3
# Determine the smaller size between ER+ and ER-
sample_size <- min(dim(ERP.pc1ihc)[1], dim(ERN.pc1ihc)[1])
# Sample equal number of ER+ and ER- samples
set.seed(seed) # Set seed for reproducibility
if (dim(ERN.pc1ihc)[1] <= dim(ERP.pc1ihc)[1]) {
i <- sample(dim(ERP.pc1ihc)[1], sample_size)
ERP.pc1ihc_sampled <- ERP.pc1ihc[i, ]
ERN.pc1ihc_sampled <- ERN.pc1ihc
} else {
i <- sample(dim(ERN.pc1ihc)[1], sample_size)
ERN.pc1ihc_sampled <- ERN.pc1ihc[i, ]
ERP.pc1ihc_sampled <- ERP.pc1ihc
}
# Subset PAM50 matrix with the IDs corresponding to balanced ER+ and ER-
mbal.pc1ihc <- mat[, c(ERP.pc1ihc_sampled$PatientID, ERN.pc1ihc_sampled$PatientID)]
#set.seed(seed);i = sample(dim(ERP.pc1ihc)[1],dim(ERN.pc1ihc)[1]) # take equal number of ER+ and ER- samples
#length(ERP.pc1ihc$PatientID[i]) # ER positive samples
#length(ERN.pc1ihc$PatientID) # ER negative samples
######=== subset pam50 matrix with the IDs corresponding to balanced ER+ and ER-
#mbal.pc1ihc = mat[,c(ERP.pc1ihc$PatientID[i],ERN.pc1ihc$PatientID)]
dim(mbal.pc1ihc) #[1] 50 236
# Find median
mdns = apply(mbal.pc1ihc,1,median,na.rm=T) # compute median of each row i.e gene
mdns.df = as.data.frame(mdns)
df.mdns = data.frame(X=rownames(mdns.df),mdns.pc1ihc=mdns.df$mdns) # ER-blanced set based on IHC status alone---
colnames(df.mdns) = c("X",calibrationParameters)
# merge this median to PAM50-medians file "mediansPerDataset_v2.txt"
fl.nm = paste(paramDir,"mediansPerDataset_v2.txt",sep="/")
#message("Reading file: ", fl.nm)
if (!file.exists(fl.nm)) {
stop("File not found: ", fl.nm)
}
fl.mdn = read.table(fl.nm,sep="\t",header=T,stringsAsFactors=F)
df.al = merge(fl.mdn,df.mdns,by="X")
#save(df.al,file="./Rdat/mediansPerDataset_15Nov17.Rdat")
write.table(df.al, file=calibrationFile, sep="\t", col.names=T,row.names=F,quote=F)
####
# run the assignment algorithm
####
#message("Sourcing: ", paste(paramDir, "subtypePrediction_distributed_PNmodified.R", sep = "/"))
#message("Sourcing: ", paste(paramDir, "subtypePrediction_functions_PNmodified.R", sep = "/"))
source(paste(paramDir,"subtypePrediction_functions_PNmodified.R",sep="/"))
source(paste(paramDir,"subtypePrediction_distributed_PNmodified.R",sep="/"))
# Call the modified function
res = subtypePrediction_distributed(paramDir, outDir, short, predFiles, hasClinical, calibrationFile, calibrationParameters)
# Verify the expected output file paths
ot.fl = paste(outDir, "/", short, "_pam50scores.txt", sep = "")
message("Expected output file: ", ot.fl)
kl = read.table(ot.fl,sep="\t",header=T,stringsAsFactors=F)#note md -- for median centered
df.kl = data.frame(PatientID=kl$X,Int.SBS.ihcMd=kl$Call,stringsAsFactors=FALSE)
colnames(df.kl) = c("PatientID",paste("Int.SBS",calibrationParameters,sep="."))
df.kl = merge(df.kl,df.cln,by="PatientID")
return(list(Int.sbs=df.kl, score.fl=kl, mdns.fl=df.al, SBS.colr = res$subtypeColors, outList=res$out, PC1cutoff=df.mis, DF.PC1=df.pca1))
}
makeCalls.v1PAM = function(df.pam, seed=118, mat, outDir=NULL){
#message("###df.pam data.frame should have a column --PatientID-- with which mat cols are also named")
#message("##v1PAM subtype column should be named ---PAM50---")
if (is.null(outDir)) {
outDir <- file.path(tempdir(), "Calls.PCAPAM50")
}
message("outDir: ", outDir)
# Check and create directory if it doesn't exist
if (!dir.exists(outDir)) {
dir.create(outDir, recursive = TRUE)
message("Directory created: ", outDir)
} else {
message("Directory already exists: ", outDir)
}
# Initial checks for 'df.cln' and 'mat'
if (is.null(df.pam) || !'PatientID' %in% colnames(df.pam) || !'PAM50' %in% colnames(df.pam)) {
stop("Clinical data 'df.pam' is missing or does not contain required columns 'PatientID' and 'PAM50'. Refer to the vignette to create 'test.intermediate subtype' data.")
}
if (is.null(mat) || !is.matrix(mat)) {
stop("Gene expression matrix 'mat' is missing or not correctly formatted. Refer to the vignette to create 'test.matrix' data.")
}
# Continue with function if checks pass
message("All necessary input data are correctly formatted and present. Grab a cup of coffee while we do the heavy lifting for you!")
# Save the current working directory and ensure it is reset upon function exit
oldwd <- getwd()
on.exit(setwd(oldwd))
# Save the current par settings
#oldpar <- par(no.readonly = TRUE)
#oldpar$new <- NULL # Remove the problematic 'new' parameter
# Ensure the par settings are reset when the function exits
#on.exit(par(oldpar))
#becuase this creates a empty plot I am doing the following
if (!is.null(dev.list())) {
# Save the current graphical parameters
oldpar <- par(no.readonly = TRUE)
# Ensure the graphical parameters are restored on function exit
on.exit(par(oldpar))
} else {
message("No active graphics device; skipping par save.")
}
#--Prepare the required input parameters for PAM50 method
predFiles = file.path(outDir, "Input_pam50genes_matrix.txt")#provided inputFile for predictions
short = "PCAPAM50.Mdns"# short name that will be used for output files
calibrationParameters = "Mdns.PCAPAM50"# suffix for naming intrinsic subtype column in o/p
Out.mdns.fl = "Mdns.PCAPAM50.txt"## where new medians will be written
calibrationFile = file.path(outDir, Out.mdns.fl)
hasClinical = F #--PCAPAM50 donot use clinical info module of PAM50 classifier
# Write the Test.matrix to the file
write.table(mat, predFiles, sep = "\t", col.names = NA)
# Verify file creation
if (!file.exists(predFiles)) {
stop("File not created: ", predFiles)
}
# Define parameters
paramDir <- system.file( "PAM50/bioclassifier_R", package = "PCAPAM50")#"extdata",
if (!file.exists(paramDir)) {
stop("Parameter directory not found: ", paramDir)
}
# Print paths for debugging
#message("paramater Dir path: ", paramDir, "\n")
#message("Input File path: ", predFiles, "\n")
#message("Output Median File path: ", calibrationFile, "\n")
ERN.pam = df.pam[which(df.pam$PAM50 %in% c("Basal")),] ### get ER- samples data.frame
dim(ERN.pam) #[1] 119 2
ERP.pam = df.pam[which(df.pam$PAM50 %in% c("LumA")),]
dim(ERP.pam) #[1] 352 2
# Determine the smaller size between ER+ and ER-
sample_size <- min(dim(ERP.pam)[1], dim(ERN.pam)[1])
# Sample equal number of ER+ and ER- samples
set.seed(seed) # Set seed for reproducibility
if (dim(ERN.pam)[1] <= dim(ERP.pam)[1]) {
i <- sample(dim(ERP.pam)[1], sample_size)
ERP.pam_sampled <- ERP.pam[i, ]
ERN.pam_sampled <- ERN.pam
} else {
i <- sample(dim(ERN.pam)[1], sample_size)
ERN.pam_sampled <- ERN.pam[i, ]
ERP.pam_sampled <- ERP.pam
}
# Subset PAM50 matrix with the IDs corresponding to balanced ER+ and ER-
mbal.pam <- mat[, c(ERP.pam_sampled$PatientID, ERN.pam_sampled$PatientID)]
#set.seed(seed);i = sample(dim(ERP.pam)[1],dim(ERN.pam)[1]) # take equal number of ER+ and ER- samples
#length(ERP.pam$PatientID[i]) # ER positive samples
#length(ERN.pam$PatientID) # ER negative samples
######=== subset pam50 matrix with the IDs corresponding to balanced ER+ and ER-
#mbal.pam = mat[,c(ERP.pam$PatientID[i],ERN.pam$PatientID)]
dim(mbal.pam) #[1] 50 306
# Find median
mdns = apply(mbal.pam,1,median,na.rm=T) # compute median of each row i.e gene
mdns.df = as.data.frame(mdns)
df.mdns = data.frame(X=rownames(mdns.df),mdns.pam=mdns.df$mdns) # ER-blanced set based on IHC status alone---
colnames(df.mdns) = c("X",calibrationParameters)
# merge this median to PAM50-medians file "mediansPerDataset_v2.txt"
fl.nm = paste(paramDir,"mediansPerDataset_v2.txt",sep="/")
#message("Reading file: ", fl.nm)
if (!file.exists(fl.nm)) {
stop("File not found: ", fl.nm)
}
fl.mdn = read.table(fl.nm,sep="\t",header=T,stringsAsFactors=F)
df.al = merge(fl.mdn,df.mdns,by="X")
#save(df.al,file="./Rdat/mediansPerDataset_15Nov17.Rdat")
write.table(df.al, file=calibrationFile, sep="\t", col.names=T,row.names=F,quote=F)#paste(paramDir,mdns.outFile,sep="/")
####
# run the assignment algorithm
# all the global variable required for assignment algorithm are set outside of function where it is been called
####
#message("Sourcing: ", paste(paramDir, "subtypePrediction_distributed_PNmodified.R", sep = "/"))
#message("Sourcing: ", paste(paramDir, "subtypePrediction_functions_PNmodified.R", sep = "/"))
source(paste(paramDir,"subtypePrediction_functions_PNmodified.R",sep="/"))
source(paste(paramDir,"subtypePrediction_distributed_PNmodified.R",sep="/"))
# Call the modified function
res = subtypePrediction_distributed(paramDir, outDir, short, predFiles, hasClinical, calibrationFile, calibrationParameters)
# Verify the expected output file paths
ot.fl = paste(outDir, "/", short, "_pam50scores.txt", sep = "")
message("Expected output file: ", ot.fl)
kl = read.table(ot.fl,sep="\t",header=T,stringsAsFactors=F)#note md -- for median centered
df.kl = data.frame(PatientID=kl$X,Int.SBS.pamMd=kl$Call,stringsAsFactors=FALSE)
colnames(df.kl) = c("PatientID",paste("Int.SBS",calibrationParameters,sep="."))
df.kl = merge(df.kl,df.pam,by="PatientID")
return(list(Int.sbs=df.kl, score.fl=kl, mdns.fl=df.al, SBS.colr = res$subtypeColors, outList=res$out))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.