#' Build CCF Matrix for samples
#' @name ccfMatBuild
#' @param samplelist samplename list, eg., TCGA-05-1425
#' @param upper ccf upper bound
#' @param input_folder ccf files folder path
#' @param binwidth bin width
#' @param random output randomized ccf matrix
#' @return ccfBandCountsMat,ccfBandCountsMat.random,ccfBandFractionMat and ccfBandFractionMat.random
#' @export
#' @importFrom NMF randomize
ccfMatBuild <- function(samplenamelist,upper=1,input_folder,binwidth=0.01,random=FALSE){
n_sample <- length(samplelist)
rows <- upper/binwidth + 1
ccfBand <- seq(0,upper,length.out = rows)
ccfBandCountsMat <- matrix(nrow = rows,ncol = n_sample)
if (n_sample == 0) stop("The number of samples is 0")
for (i in 1:n_sample){
sample_name <- as.character(samplelist[i])
ssm <- load_ccf(sample_name,input = input_folder)
matchBandCountDf <- data.frame(Var1=as.character(1:rows))
matchBandCountDf <- suppressWarnings(left_join(matchBandCountDf,as.data.frame(table(findInterval(ssm$ccube_ccf,ccfBand))),stringAsFactors = F,by="Var1"))
ccfBandCountsMat[,i] <- matchBandCountDf$Freq
}
if (random==TRUE){
if (n_sample == 1) {
ccfBandFractionMat <- ccfBandCountsMat/sum(ccfBandCountsMat)
ccfBandFractionMat.random <- ccfBandFractionMat[sample(1:(rows-1))]
ccfBandCountsMat.random <- ccfBandCountsMat[sample(1:(rows-1))]
} else {
ccfBandFractionMat <- apply(ccfBandCountsMat,2,function(x) x/sum(x))
ccfBandCountsMat.random <- randomize(ccfBandCountsMat)
ccfBandFractionMat.random <- apply(ccfBandCountsMat.random,2,function(x) x/sum(x))
}
outputlist <- list(ccfBandCountsMat,ccfBandCountsMat.random,ccfBandFractionMat,ccfBandFractionMat.random)
} else {
if (n_sample == 1) {
ccfBandFractionMat <- ccfBandCountsMat/sum(ccfBandCountsMat)
} else {
ccfBandFractionMat <- apply(ccfBandCountsMat,2,function(x) x/sum(x))
}
outputlist <- list(ccfBandCountsMat,ccfBandFractionMat)
}
return(outputlist)
}
#' Build count matrix for input samples per cancer type
#' @name ccfMatrixBuild
#' @param post_summary samples' name
#' @param ccfupper set CCF upper bound
#' @param input_folder folder path stores ccf files
#' @param genelist specify whether to only count for certain genes
#' @param output output folder path
#' @param RankEstimateType output rank estimate matirx format
#' @return CCF matrix for each cancer type
#' @export
#' @import dplyr
ccfMatBuild_output <- function(post_summary,input_folder,output=NA,ccfupper=1,RankEstimateType="fraction",add_samplename=TRUE,minsample=30){
samplelist_all <- post_summary$samplename
type <- post_summary %>% group_by(.data$cancertype)%>% summarize(n=n())
typelist <- as.character(subset(type,n>=30)$cancertype)
ntype <- length(typelist)
cat(paste0("\n Start constructing ccf count matrix for ",ntype," types (>",minsample," samples) \n"))
if (!is.na(output)) {
ccfOutput_path <- paste0(output,"ccfMat/")
ccfOutput_all_path <- paste0(output,"ccfMat/All/")
ccfOutput_rank_path <- paste0(output,"ccfMat/rank_estimate/")
multi_dir_create(c(ccfOutput_all_path,ccfOutput_rank_path))
for (i in 1:ntype){
type <- typelist[i]
cat(paste0("\n >>>> loading ",i,'th type - ',type," <<<< \n"))
samplelist_type <- subset(post_summary,cancertype==type)$samplename
n_sample <- length(samplelist_type)
ccfBandCountsMat <- suppressWarnings(ccfMatBuild(samplelist_type,input_folder = input_folder,upper=ccfupper,add_samplename = add_samplename))
ccfCountMatrix <-ccfBandCountsMat[[1]]
ccfCountsMatrix.random <- ccfBandCountsMat[[2]]
ccfFractionMatrix <- ccfBandCountsMat[[3]]
ccfFractionMatrix.random <- ccfBandCountsMat[[4]]
# create directio
multi_dir_create(paste0(ccfOutput_path,type,"/"))
# Output Matrix
output_format <- paste0(ccfOutput_path,type,"/",type,"_", n_sample,"_0-",ccfupper)
output_format_rank <- paste0(ccfOutput_rank_path,type,"_", n_sample,"_0-",ccfupper)
save(ccfCountMatrix,file=paste0(output_format,"_ccfCountMatrix_",Sys.Date(),".RData"))
save(ccfCountsMatrix.random,file=paste0(output_format,"_ccfCountMatrix.random_",Sys.Date(),".RData"))
save(ccfFractionMatrix,file=paste0(output_format,"_ccfFractionMatrix_",Sys.Date(),".RData"))
save(ccfFractionMatrix.random,file=paste0(output_format,"_ccfFractionMatrix.random_",Sys.Date(),".RData"))
# output to rank estimate folder
if (RankEstimateType=="fraction") {
write.csv(ccfFractionMatrix.random,file=paste0(output_format_rank,"_ccfFractionMatrix.random_",Sys.Date(),".csv"))
write.csv(ccfFractionMatrix,file=paste0(output_format_rank,"_ccfFractionMatrix_",Sys.Date(),".csv"))
}
if (RankEstimateType=="count") {
write.csv(ccfCountsMatrix.random,file=paste0(output_format_rank,"_ccfCountMatrix.random_",Sys.Date(),".csv"))
write.csv(ccfCountMatrix,file=paste0(output_format_rank,"_ccfCountMatrix_",Sys.Date(),".csv"))
}
}
}
# ouput ccf matrix for all samples
ccfBandCountsMat_all <- suppressWarnings(ccfMatBuild(samplelist_all,input_folder = input_folder,upper=ccfupper,add_samplename = add_samplename))
ccfCountMatrix_all <- ccfBandCountsMat_all[[1]]
ccfFractionMatrix_all <- ccfBandCountsMat_all[[3]]
output_all_format <- paste0(ccfOutput_all_path,"All_",length(samplelist_all),"_0-",ccfupper)
# Output Matrix
save(ccfCountMatrix_all,file=paste0(output_all_format,"_ccfCountMatrix_",Sys.Date(),".RData"))
save( ccfFractionMatrix_all,file=paste0(output_all_format,"_ccfFractionMatrix_",Sys.Date(),".RData"))
cat(paste0("\n Done, saved results to ",output))
}
#' Combine signatures from different cancer type together
#' @name combine_sig_nmf
#' @param input_folder folder stores signatures for each type
#' @param cancertype cancer type list
#' @param output_folder output_folder
#' @export
#' @return matrix combining all signature matrix
combine_sig_nmf <- function(input_folder,output_folder=NA,cancertype){
for (i in 1:length(cancertype)){
tryCatch({
type <- cancertype[i]
cat(paste0("-> loading CCF matirx for ",i,"th type : ",type),"\n")
sig_file <- dir(paste0(input_folder,type,"/"))[grep("sig",dir(paste0(input_folder,type,"/")))]
load(paste0(input_folder,type,"/",sig_file))
colnames(sig) <- paste0(type,"_sig",1:ncol(sig))
if (i==1) {
combine_sig <- sig
} else {
combine_sig <- cbind(combine_sig,sig)
}
},error=function(e) print("Fail load this type"))
}
if (!is.na(output_folder)) save(combine_sig,file=paste0(output_folder,"combine_sig_",Sys.Date(),".RData"))
combine_sig
}
#' Perform NMF with consensus signature
#' @param ccfMat ccf matrix for all samples
#' @param consensus_sig consensus signature matrix(columns as signaguture,row as bins)
#' @param samplelist input sample list
#' @param output output folder path
#' @return exposure
#' @export
#' @importFrom YAPSA LCD
Extract_sig <- function(ccfMat,consensus_sig,output=NA){
n_sig <- ncol(consensus_sig)
Mat <- t(ccfMat[,1:100])
Mat[is.na(Mat)] <- 0
EvoDynamics_exposure <- as.data.frame(t(LCD(Mat,consensus_sig))) %>%
cbind(.,apply(.[,1:n_sig],2,function(x) x/sum(x))) %>%
set_colnames(c(paste0("Evo_sig_",1:n_sig),paste0("Evo_sig_",1:n_sig,"_proportion"))) %>%
mutate(samplename=ccfMat[,101]) %>%
file_format(n_sig+1)
if (!is.na(output)) {
if (!dir.exists(output)) {
dir.create(output)
}
save(EvoDynamics_exposure,file=paste0(output,"evoDynamic_exposure.RData"))
}
EvoDynamics_exposure
}
#' Perform Hierarchical clustering number estimate
#' @name hc_cluster_test ccfMat ccf matrix for all samples
#' @param data combined signatures for all cancer types
#' @param methods clustering method
#' @param distance distance funciton
#' @param min minimum clustering number
#' @param max maximum clustering number
#' @return exposure
#' @import NbClust
#' @export
hc_cluster_test <- function(data,methods,distance,min = 2,max = 10){
getmode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
tabla = as.data.frame(matrix(ncol = length(distance), nrow = length(methods)))
names(tabla) = distance
for (j in 2:length(distance))
for(i in 1:length(methods)){
tryCatch({
nb = NbClust(data,distance = distance[j],
min.nc = min, max.nc = max,
method = "complete", index =methods[i])
tabla[i,j] = nb$Best.nc[1]
tabla[i,1] = methods[i]
},error=function(e) print("error"))
}
tabla <- rbind(tabla,c("Most_common",apply(tabla[,2:5],2,getmode)))
return(tabla)
}
#' Combine signatures and estimate number of clusters
#' @name hc_combine_sig Combine signatures and estimate number of clusters
#' @param ccfMatrix ccf matrix for all samples
#' @param distance distance function
#' @param input_folder input folder
#' @param output_folder output folder path
#' @param min.nc minimum cluster
#' @param max.nc maximum cluster
#' @param cancertype cancer types
#' @return list(combine_sig,cluster)
#' @import dplyr
#' @import NbClust
#' @importFrom magrittr %>% set_colnames
#' @export
hc_combine_sig <- function(input_folder,output_folder,cancertype,min.nc=3,max.nc=10,method="ward.D2",index="hubert",distance="euclidean"){
cat(red("Start run hierarchical clustering to extract consensus signature. \n"))
combine_sig <- combine_sig_nmf(input_folder=input_folder,output_folder=output_folder,cancertype=cancertype)
if (index=="hubert") {
pdf(paste0(output_folder,"hc_hubert_plot_",Sys.Date(),".pdf"))
res.nbclust <- NbClust(t(combine_sig),min.nc = min.nc, max.nc = max.nc,method=method,index = index,distance=distance)
second_diff = res.nbclust$All.index[2:(max.nc-min.nc+1)]-res.nbclust$All.index[1:(max.nc-min.nc)]
cluster = as.numeric(which(second_diff==max(second_diff)))+min.nc
print(paste0("The suggested number of clusters from the Hubert index was - ",cluster))
cat(paste0("-> The Hubert index was saved as: ",paste0(output_folder,"hc_hubert_plot.pdf \n")))
dev.off()
}
NbClust(t(combine_sig),min.nc = min.nc, max.nc = max.nc,method=method,index = index,distance=distance)
return(list(combine_sig,cluster))
}
#' Plotting for HC results
#' @name hc_consensus ccfMat ccf matrix for all samples
#' @param combine_sig combined signatures for all cancer types
#' @param cluster clustering number
#' @param output_folder output folder path
#' @param distance distance function
#' @return consensus_sig
#' @import pheatmap
#' @importFrom cowplot save_plot
#' @import dplyr
#' @importFrom magrittr %>% set_colnames
#' @importFrom grDevices colorRampPalette
#' @importFrom RColorBrewer brewer.pal
#' @export
hc_consensus_sig <- function(combine_sig,cluster,output_folder,method="ward.D2",index="hubert",distance="euclidean"){
combine_sig <- apply(combine_sig,2,function(x) x/sum(x))
upper = quantile(as.matrix(combine_sig),0.95);breaksList = seq(0, upper, by = 0.01)
col <- colorRampPalette(rev(brewer.pal(n = cluster, name = "RdYlBu")))(length(breaksList))
out <- pheatmap(combine_sig, cutree_cols = cluster, fontsize_col = 5,fontsize_row = 0.4,color = col, breaks = breaksList,clustering_distance_cols=distance,cluster_rows=F,filename=paste0(output_folder,distance,"_",cluster,"_hc_heatmap_",Sys.Date(),".pdf"),clustering_method = method)
sig_label <- as.data.frame(cutree(out$tree_col,k=cluster)) %>%
set_colnames("cluster") %>%
mutate(sig=rownames(.))
## Compute consensu signatures for each cluster
consensus_sig <- as.data.frame(t(combine_sig)) %>%
mutate(sig=rownames(.)) %>%
left_join(sig_label,by="sig") %>%
group_by(cluster) %>%
mutate(sig=NULL) %>%
summarise_all(mean)
save(consensus_sig,file=paste0(output_folder,distance,"_",cluster,"_consensus_sig_",Sys.Date(),".RData"))
consensus_sig <- apply(t(consensus_sig[,2:101]),2,as.numeric)
p1 <- plot_grid(sig_plot(consensus_sig))
save_plot(paste0(output_folder,distance,"_",cluster,"_consensus_sig_",Sys.Date(),".pdf"),p1,base_asp = cluster)
sig_plot(consensus_sig)
cat(paste0("-> The consensus signature result was saved at: ",output_folder,"\n"))
return(consensus_sig)
}
#' sig_assignment
#' @name hc_consensus ccfMat ccf matrix for all samples
#' @param signature signature
#' @param ccfMatrix ccfmatrix
#' @param output output folder path
#' @return exposure
#' @import dplyr
#' @importFrom cowplot save_plot
#' @importFrom YAPSA LCD
#' @export
sig_assignment <- function(signature,ccfMatrix,output=NA){
ccfMatrix[is.na(ccfMatrix)] = 0
exposure <-LCD(ccfMatrix,signature)
exposure <- as.data.frame(t(exposure)) %>% set_colnames(paste0("sig_",1:ncol(.)))
if (!is.na(output)) save(exposure,file=paste0(output,"/lcd_exposure",Sys.Date(),".RData"))
return(exposure)
}
#' Plot signature matrix
#' @name sig_plot
#' @param sig signature matrix
#' @return mydata mutation data frame
#' @export
#' @importFrom reshape2 melt
#' @importFrom grid grid.draw
#' @importFrom RColorBrewer brewer.pal
#' @importFrom magrittr %>% set_colnames
#' @import dplyr
sig_plot <- function(sig,tag=NULL,col=NA,theme="grey"){
xx <- as.data.frame(t(apply(sig,2,function(x) x/sum(x)))) %>%
set_colnames(1:ncol(.)) %>%
mutate(signature=paste0("Signature ",1:nrow(.))) %>%
melt(.,id=c("signature"))
if (is.na(col)){
if (ncol(sig)<=8) fills <- brewer.pal(8, "Set3")[c(1,3:8,2)] else
fills <- brewer.pal(ncol(sig), "Set3")[c(1,3:8,2,9:ncol(sig))]
} else {fills <- col}
p1 <- ggplot(xx,aes(y=value,x=variable)) + geom_bar(aes(fill=signature),stat='identity') + scale_fill_manual(values = fills)+ theme_light()+
#ggtitle(paste0("rank = ",rank,", cancertype = ",type, ", MatrixType = ",MatType )) +
theme(legend.title = element_blank(),
legend.position = "none",
strip.text.x = element_text(color= "white",size=10),
panel.grid= element_blank(),
axis.title.x = element_text(color = "grey20", size = 8),
axis.text.x = element_text(color = "grey20", size = 6),
axis.text.y = element_text(color = "grey20", size = 6),
plot.title = element_text(size= 10)) +
facet_grid(cols = vars(signature),scales="free")+ scale_x_discrete(breaks=c("1","50","100") ,labels=c("0", "0.5", "1"))+xlab("Cancer Cell Fraction") + ylab("") +
labs(title="Consensus Signature of evolutionary dynamics",tag=tag)
g1 <- ggplot_gtable(ggplot_build(p1))
strip_both <- which(grepl('strip-', g1$layout$name))
k <- 1
for (i in strip_both) {
j1 <- which(grepl('rect', g1$grobs[[i]]$grobs[[1]]$childrenOrder))
g1$grobs[[i]]$grobs[[1]]$children[[j1]]$gp$fill <- fills[k]
#g1$layout$clip[j3] <- "off"
k <- k+1
}
grid.draw(g1)
return(g1)
}
#' Perform nmf for a cancer type
#' @name nmf_sig_plot_type
#' @param type cancer type
#' @param MatType matrix type
#' @param input_folder ccf file path
#' @param output output file path
#' @param rank rank
#' @return save nmf results in output folder and plot signature for this type
#' @import NMF
#' @importFrom magrittr %>% set_colnames
#' @import dplyr
nmf_sig_plot_type <- function(type,MatType="fraction",input_folder,output,rank,nrun){
type_path <- paste0(input_folder,type,"/")
if (MatType=="fraction") {
file_path <- paste0(input_folder,type,"/",dir(type_path)[grep("ccfFractionMatrix_",dir(type_path))])
}
if (MatType=="count"){
file_path <- paste0(input_folder,type,"/",dir(type_path)[grep("ccfCountMatrix_",dir(type_path))])
}
if (!dir.exists(paste0(output,type))) {
dir.create(paste0(output,type))
}
load(file=file_path)
#format rank summary file
if (exists("ccfFractionMatrix")) ccfMat <- ccfFractionMatrix
if (exists("ccfCountMatrix")) ccfMat <- ccfCountMatrix
n_sample <- ncol(ccfMat)
ccf <- t(apply(ccfMat[1:100,],1,as.numeric))
#preprocess for rows with all 0
index_p <- which(rowSums(ccf)>0)
index_n <- which(!rowSums(ccf)>0)
ccf<- ccf[which(rowSums(ccf)>0),]
#run NMF
res <- nmf(ccf,rank,nrun=nrun,.opt='vp4')
sig <- as.data.frame(matrix(0,nrow=length(index_p)+length(index_n),ncol=ncol(res@fit@W)))
sig[c(index_p),] <- as.data.frame(res@fit@W) %>% set_colnames(paste0("sig_",1:ncol(.)))
expo <- as.matrix(res@fit@H)
#output sig and expo
expo <- as.data.frame(t(expo))
colnames(expo)[1:rank] <- paste0("sig_",1:rank)
save(expo,file=paste0(output,type,'/',type,'_',n_sample,"_expo_",Sys.Date(),".RData"))
save(sig,file=paste0(output,type,'/',type,'_',n_sample,"_sig_",Sys.Date(),".RData"))
save(res,file=paste0(output,type,'/',type,'_',n_sample,"_res_",Sys.Date(),".RData"))
}
#' Perform nmf for multiple cancer types
#' @name nmf_sig_all_plot
#' @param input_folder ccf file path
#' @param output output file path
#' @param rank_summary rank summary file
#' @return save nmf results in output folder and plot signature for all cancer types
#' @export
#' @import dplyr
#' @import NMF
nmf_sig_all_plot <- function(input_folder,output,rank_summary,MatType="fraction",nrun=100) {
if (!dir.exists(output)) dir.create(output)
lapply(1:nrow(rank_summary),function(x) nmf_sig_plot_type(rank_summary[x,1],input_folder=input_folder,output=output,rank=rank_summary[x,2],MatType=MatType,nrun=nrun))
}
#' Plotting rank estimate
#' @name rank_estimate_plot
#' @param outputFolder folder stores rank estimate files
#' @param rankfilepath rank file path to output
#' @return save nmf results in output folder and plot signature for all cancer types
#' @export
#' @importFrom cowplot plot_grid
#' @importFrom gridExtra arrangeGrob grid.arrange
#' @import dplyr
#' @import ggplot2
#' @import reshape2
rank_estimate_plot <- function(outputFolder,rankfilepath,format) {
typelist <- unique(unlist(lapply(dir(outputFolder),function(x) strsplit(x,"_")[[1]][[1]])))
rank_blank <- data.frame(cancertype=typelist,rank=NA,rss_suggested_rank=NA)
i = 1
for (i in 1:length(typelist)) {
tryCatch({
type <- typelist[i]
if (format=="fraction"){
estimate <- read.csv(paste0(outputFolder,type,"_ccfFractionMatrix.csv")) %>% mutate(Data='normal')
estimate_random <- read.csv(paste0(outputFolder,type,"_ccfFractionMatrix.random.csv")) %>% mutate(Data='random')
}
if (format=="count"){
estimate <- read.csv(paste0(outputFolder,type,"_ccfCountMatrix.csv")) %>% mutate(Data='normal')
estimate_random <- read.csv(paste0(outputFolder,type,"_ccfCountMatrix.random.csv")) %>% mutate(Data='random')
}
estimate_rank <- rbind(estimate,estimate_random) %>% filter(rank>2)
xx <- reshape2::melt(estimate_rank,id=c("rank","Data")) %>% mutate(Measure=NA)
xx$variable <- as.character(xx$variable)
xx[which(xx$variable=="dispersion"),]$Measure <- "Best fit"
xx[which(xx$variable=="cophenetic"),]$Measure <- "Consensus"
xx[which(xx$variable=="sparseness1"),]$Measure <- "Basis"
xx[which(xx$variable=="sparseness2"),]$Measure <- "Coefficients"
xx[c(which(xx$variable=="sparseness2"),which(xx$variable=="sparseness1")),]$variable <- "sparseness"
xx[which(xx$variable=="rss"),]$Measure <- "Consensus"
xx <- subset(xx,variable %in% c("cophenetic","dispersion","rss","sparseness"))
min_rank = min(xx$rank)
idx = 1:(nrow(estimate)-1)
rss_decrease <- min_rank + which((estimate[order(estimate$rank),]$rss[idx]-estimate[order(estimate$rank),]$rss[idx+1]) - (estimate_random[order(estimate_random$rank),]$rss[idx]-estimate_random[order(estimate_random$rank),]$rss[idx+1])<0)[1]
rank_blank[which(rank_blank$cancertype == type),]$rss_suggested_rank <- rss_decrease
write.csv(estimate_rank,paste0(outputFolder,type,'_rank_summary.csv'))
command1 <- paste0('g',i,'<-ggplot(data=xx,aes(x=rank,y=value))+ geom_line(aes(lty=Data,color=Measure),cex=1)+geom_point(aes(shape=Data,color=Measure),cex=2)+facet_wrap(~variable,scales="free",nrow=1)+
labs(title=paste0("Rank estimate for ",type),subtitle=paste0("- rss suggested rank = ",rss_decrease))+
scale_x_continuous(breaks = min(xx$rank):max(xx$rank))+theme_grey()+ theme(strip.background = element_rect(fill="orange"),strip.text = element_text(colour ="white",size=14))')
command2 <- paste0("print(g",i,")")
eval(parse(text=command1))
eval(parse(text=command2))
},error=function(e) print("error!"))
}
if (exists("g1")) {
# output signature plot for all cancer types
j <- length(typelist)
n_file <- j %% 8
if (n_file==0) {
n_pdf <- (j %/% 8)
} else {
n_pdf <- (j %/% 8)+1
}
for (i in 1:n_pdf) {
if (i != n_pdf) {
commands1 <- paste0("pdf(file=paste0(outputFolder",",'rank_estimate_g',(8*i-7),'-',8*i,'_',Sys.Date(),'.pdf'),height=25,width=12)")
commands2 <- paste0("p",i,"<- plot_grid(paste0('g',(8*i-7):(8*i),collapse=','),align='V',ncol=1,rel_heights = c(0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1))")
} else {
commands1 <- paste0("pdf(file=paste0(outputFolder",",'rank_estimate_g',(8*i-8),'-',8*i-8+n_file,'_',Sys.Date(),'.pdf'),height=25,width=12)")
commands2 <- paste0("p",i," <- plot_grid(paste0('g',(8*i-8):(8*i-8+n_file),collapse=','),align='V',ncol=1,rel_heights = c(0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1))")
}
commands3 <- paste0("print(p",i,")")
commands4 <- "dev.off()"
eval(parse(text=commands1))
eval(parse(text=commands2))
eval(parse(text=commands3))
eval(parse(text=commands4))
}
write.csv(rank_blank,file=rankfilepath)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.