WISP.getPureCentro = function(data,cl,pureSamples_filtering = TRUE, nb_markers_selection = c("custom", "optim_kappa")[1], nb_markers_max_perClass = 50, markers_cutoff_pval_anovatest = 0.05, markers_pval_anovatest_fdr = TRUE, markers_cutoff_auc = 0.8, pureSamples_deltaTopWeights = 0.5, plot_heatmap = TRUE, add_markers = NULL, col_purePop = NULL, sum_LessThanOne = TRUE){
datatot = data
if(is.null(names(cl))){
names(cl) = colnames(data)
warning("The vector cl of the classes does not contain names, WISP assumes that the samples are in the same order as in the data.")
} else if(all(names(cl) == colnames(data)) == "FALSE"){
stop("The names in the vector cl of the classes do not match the sample names in the data.")
}
cltot = cl
totsample = colnames(data)
ind2keep = 0
if(pureSamples_filtering == TRUE){
resW = data.frame(1:ncol(data))
while(length(ind2keep) != nrow(resW)){
print("Retrieving markers")
resTest = testmarkers(data,cl,markers_cutoff_pval_anovatest, markers_pval_anovatest_fdr)
print("Centroid calculation")
centrores = centroestim(data, cl, resTest = resTest, nb_markers_selection=nb_markers_selection, nb_markers_max_perClass = nb_markers_max_perClass, markers_cutoff_auc = markers_cutoff_auc, add_markers = add_markers,markers_cutoff_pval_anovatest = 0.05, markers_pval_anovatest_fdr = TRUE)
genescentro = centrores$centroid
if(length(setdiff(levels(as.factor(cl)), levels(as.factor(centrores$cl)))>0)){
popweak = setdiff(levels(as.factor(cl)), levels(as.factor(centrores$cl)))
print(paste("Population", popweak, "has no specific markers and has been removed.", sep=" "))
warning(paste("Population", popweak, "has no specific markers and has been removed.", sep=" "))
}
cl = centrores$cl
data = centrores$data
print("Weight estimation")
resW = WISP.getWeight(as.matrix(data[rownames(genescentro),]), as.matrix(genescentro), sum_LessThanOne)
names(cl) = colnames(data)
resW$labelhisto = as.factor(cl[rownames(resW)])
resW=resW[resW$WARNING == "OK",]
ind2keep = rownames(resW)[which(resW[,"topWeightedClass"] == resW[,"labelhisto"] & abs(resW$deltaTopWeights)> pureSamples_deltaTopWeights)]
if(length(setdiff(colnames(data), ind2keep)) == 0){
print(paste("No sample was removed" ,setdiff(colnames(data), ind2keep)))
} else if(length(setdiff(colnames(data), ind2keep)) == 1){
print(paste(length(setdiff(colnames(data), ind2keep)), "sample was removed:" ,setdiff(colnames(data), ind2keep)))
} else {
print(paste(paste(length(setdiff(colnames(data), ind2keep)), "samples were removed:", collapse=" "), paste(setdiff(colnames(data), ind2keep), collapse=" "), collapse=" "))
}
data = data[,ind2keep]
cl = cl[ind2keep]
}
print(paste("Final Centroid calculation (on ", paste(ncol(data), sep=" "), " samples)", sep=""))
resTest = testmarkers(data,cl)
genescentroRes = centroestim(data, cl, resTest = resTest, nb_markers_selection=nb_markers_selection, nb_markers_max_perClass = nb_markers_max_perClass, markers_cutoff_auc = markers_cutoff_auc, add_markers = add_markers)
genescentro = genescentroRes$centroid
} else {
resTest = testmarkers(datatot,cltot,markers_cutoff_pval_anovatest, markers_pval_anovatest_fdr)
genescentroRes = centroestim(data=datatot, cl=cltot, resTest = resTest, nb_markers_selection=nb_markers_selection, nb_markers_max_perClass = nb_markers_max_perClass, markers_cutoff_auc = markers_cutoff_auc, add_markers = add_markers,markers_cutoff_pval_anovatest = 0.05, markers_pval_anovatest_fdr = TRUE)
genescentro = genescentroRes$centroid
ind2keep = colnames(datatot)
}
indremoved = setdiff(colnames(datatot),ind2keep)
if(plot_heatmap == TRUE){
data.before = datatot[rownames(genescentro), ]
data.before = data.before-rowMeans(data.before)
data.after = data[rownames(genescentro), ]
data.after = data.after-rowMeans(data.after)
cl = cl[colnames(data.after)]
nbclasses = nlevels(as.factor(cltot))
if(is.null(col_purePop)){
BaseColors <- stats::setNames(grDevices::rainbow(nbclasses), levels(as.factor(cltot)))
} else {
BaseColors = col_purePop
}
SelectionColors.after = BaseColors[levels(as.factor(as.character(cl)))]
annot1 = data.frame(Classes = sort(cl))
ha1 = HeatmapAnnotation(df = annot1,
col = list("Classes" = SelectionColors.after))
ht1 = ComplexHeatmap::Heatmap(data.after[,names(sort(cl))], circlize::colorRamp2(c(-2, 0, 2), c("blue", "white", "red")), cluster_columns = FALSE,show_column_names = T, cluster_rows = FALSE,row_names_side="left",column_names_side="top", name = "Expression",top_annotation = ha1, row_names_gp = grid::gpar(fontsize = 6),column_names_gp = grid::gpar(fontsize = 7), column_title = "Samples kept")
genesclassList = genescentroRes$marksList
genesclass = unlist(genesclassList)
names(genesclass) = rep(names(genesclassList), lapply(genesclassList, length))
if(sum(duplicated(genesclass))>0){
genesdup = genesclass[ which(duplicated(genesclass))]
genesclassuniq = genesclass[-which(duplicated(genesclass))]
names(genesclassuniq)[which(genesclassuniq %in% genesdup)]="notspecific"
genesclass=genesclassuniq
}
annotgenes = data.frame("Classes_marker" = names(genesclass)[match(rownames(data.before),genesclass)])
hr = rowAnnotation(df = annotgenes, col = list("Classes_marker" = c(SelectionColors.after,"notspecific"="grey")),width = unit(1, "cm"))
if(length(indremoved)!=0){
if(length(indremoved)==1){
data.removed = data.frame(data.before[,indremoved])
rownames(data.removed) = rownames(data.before)
colnames(data.removed) =indremoved
cl.removed = cltot[indremoved]
SelectionColors.removed = BaseColors[levels(as.factor(as.character(cl.removed)))]
annot2 = data.frame(Classes = cl.removed)
ha2 = HeatmapAnnotation(df = annot2,
col = list("Classes" = SelectionColors.removed))
ht2 = ComplexHeatmap::Heatmap(data.removed, circlize::colorRamp2(c(-2, 0, 2), c("blue", "white", "red")), cluster_columns = FALSE,show_row_names = FALSE,show_column_names = T, cluster_rows = FALSE,column_names_side="top",top_annotation = ha2, column_names_gp = grid::gpar(fontsize = 7), show_heatmap_legend = FALSE,column_title = "Samples removed")
} else {
data.removed = data.before[,indremoved]
cl.removed = cltot[indremoved]
SelectionColors.removed = BaseColors[levels(as.factor(as.character(cl.removed)))]
annot2 = data.frame(Classes = sort(cl.removed))
ha2 = HeatmapAnnotation(df = annot2,
col = list("Classes" = SelectionColors.removed))
ht2 = ComplexHeatmap::Heatmap(data.removed[,names(sort(cl.removed))], circlize::colorRamp2(c(-2, 0, 2), c("blue", "white", "red")), cluster_columns = FALSE,show_row_names = FALSE,show_column_names = T, cluster_rows = FALSE,column_names_side="top",top_annotation = ha2, column_names_gp = grid::gpar(fontsize = 7), show_heatmap_legend = FALSE,column_title = "Samples removed")
}
ComplexHeatmap::draw(hr+ht1+ht2,gap = unit(1.5, "cm"))
names(indremoved) = cltot[indremoved]
} else {
ComplexHeatmap::draw(hr+ht1)
}
}
names(ind2keep) = cltot[ind2keep]
if(nb_markers_selection == "optim_kappa"){
message("Creation of the file \'table_with_allConditionNumber.txt\'")
}
return(list("genescentro" = genescentro, "indkept"=ind2keep, "indremoved"=indremoved, "genesclasses" = genescentroRes$marksList))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.