Nothing
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
#' CsvToJMat
#'
#' Gets a csv/tsv file and writes to a disk file the binary matrix of counts contained in it in the jmatrix binary format.\cr
#' First line of the .csv is supposed to have the field names.\cr
#' First column of each line is supposed to have the row name.\cr
#' The fields are supposed to be separated by one occurrence of a character-field sepparator (usually, comma or tab)
#' .tsv files can be read with this function, too, setting the csep argument to '\\t'
#'
#' The parameter transpose has the default value of FALSE. But don't forget to set it to TRUE if you want the cells
#' (which in single cell common practice are by columns) to be written by rows. This will be needed later to calculate
#' the dissimilarity matrix, if this is the next step of your workflow. See help of CalcAndWriteDissimilarityMatrix
#'
#' Special note for loading symmetric matrices:\cr
#' If you use this function to load what you expect to be a symmetric matrix from a .csv file, remember that the input table
#' MUST be square, but only the lower-diagonal matrix will be stored, including the main diagonal. The rest of the input table is
#' completely ignored, except to check that there are values in it. It is not checked if the table really represents a
#' symmetric matrix or not.\cr
#' Furthermore, symmetric matrices can only be loaded in raw mode, i.e.: no normalization is allowed, and they cannot be transposed.
#'
#' @param ifname A string with the name of the .csv/.tsv text file.
#' @param ofname A string with the name of the binary output file.
#' @param mtype A string to indicate the matrix type: 'full', 'sparse' or 'symmetric'. Default: 'sparse'
#' @param csep The character used as separator in the .csv file. Default: ',' (comma) (Set to '\\t' for .tsv)
#' @param ctype The string 'raw' or 'log1' to write raw counts or log(counts+1), or the normalized versions, 'rawn' and 'log1n', which normalize ALWAYS BY COLUMNS (before transposition, if requested to transpose). The logarithm is taken base 2. Default: raw
#' @param valuetype The data type to store the matrix. It must be one of the strings 'uint32', 'float' or 'double'. Default: float
#' @param transpose Boolean to indicate if the matrix should be transposed before writing. See Details for a comment about this. Default: FALSE
#' @param comment A comment to be stored with the matrix. Default: "" (no comment)
#' @return No return value, called for side effects (creates a file)
#' @examples
#' # Since we have no a .csv file to test, we will generate one with another funcion of this package
#' Rf <- matrix(runif(48),nrow=6)
#' rownames(Rf) <- c("A","B","C","D","E","F")
#' colnames(Rf) <- c("a","b","c","d","e","f","g","h")
#' tmpfile1=paste0(tempdir(),"/Rfullfloat.bin")
#' tmpfile2=paste0(tempdir(),"/Rfullfloat2.bin")
#' tmpcsvfile1=paste0(tempdir(),"/Rfullfloat.csv")
#' JWriteBin(Rf,tmpfile1,dtype="float",dmtype="full",comment="Full matrix of floats")
#' JMatToCsv(tmpfile1,tmpcsvfile1)
#' CsvToJMat(tmpcsvfile1,tmpfile2)
#' # It can be checked that files Rfullfloat.bin and Rfullfloat2.bin contain the same data
#' # (even they differ in the comment, which has been eliminated when converting to csv)
#' @export
CsvToJMat <- function(ifname, ofname, mtype = "sparse", csep = ',', ctype = "raw", valuetype = "float", transpose = FALSE, comment = "") {
invisible(.Call(`_parallelpam_CsvToJMat`, ifname, ofname, mtype, csep, ctype, valuetype, transpose, comment))
}
#' JMatToCsv
#'
#' Writes a binary matrix in the jmatrix package format as a .csv file. This is mainly for checking/inspection and
#' to load the data from R as read.csv, if the memory of having all data as doubles allows doing such thing.
#'
#' The numbers are written to text with as many decimal places as allowed by its data type (internally obtained
#' with std::numeric_limits<type>::max_digits10)\cr
#' NOTE ON READING FROM R: to read the .csv files exported by this function you MUST use the R function read.csv
#' (not read.table) AND set its argument row.names to 1, since we always write a first column with the row names,
#' even if the binary matrix does not store them; in this case they are simply "1","2",...
#'
#' @param ifile String with the file name that contains the binary data.
#' @param csvfile String with the file name that will contain the data as csv.
#' @param csep Character used as separator. Default: , (comma)
#' @param withquotes boolean to mark if row and column names in the .csv file must be written surrounded by doble quotes. Default: FALSE
#' @return No return value, called for side effects (creates a file)
#' @examples
#' Rf <- matrix(runif(48),nrow=6)
#' rownames(Rf) <- c("A","B","C","D","E","F")
#' colnames(Rf) <- c("a","b","c","d","e","f","g","h")
#' tmpfile1=paste0(tempdir(),"/Rfullfloat.bin")
#' tmpcsvfile1=paste0(tempdir(),"/Rfullfloat.csv")
#' JWriteBin(Rf,tmpfile1,dtype="float",dmtype="full",comment="Full matrix of floats")
#' JMatToCsv(tmpfile1,tmpcsvfile1)
#' @export
JMatToCsv <- function(ifile, csvfile, csep = ',', withquotes = FALSE) {
invisible(.Call(`_parallelpam_JMatToCsv`, ifile, csvfile, csep, withquotes))
}
#' @importFrom memuse Sys.meminfo Sys.swapinfo
NULL
#' ParallelpamSetDebug
#'
#' Sets debugging in parallelpam package to ON (with TRUE) or OFF (with FALSE) for both parts of it.\cr
#' On package load the default status is OFF.\cr
#' Setting debugging of any part to ON shows a message. Setting to OFF does not show anything (since debugging is OFF...)
#'
#' @param deb boolean, TRUE to generate debug messages for the PAM algorithm and silhouette calculation and FALSE to turn them off. Default: true.
#' @param debjmat boolean, TRUE to generate debug messages for the jmatrix part inside this package and FALSE to turn them off. Default: false
#' @return No return value, called for side effects (internal boolean flag changed)
#' @examples
#' ParallelpamSetDebug(TRUE,debjmat=TRUE)
#' ParallelpamSetDebug(TRUE,debjmat=FALSE)
#' @export
ParallelpamSetDebug <- function(deb = TRUE, debjmat = FALSE) {
invisible(.Call(`_parallelpam_ParallelpamSetDebug`, deb, debjmat))
}
#' CalcAndWriteDissimilarityMatrix
#'
#' Writes a binary symmetric matrix with the dissimilarities between ROWS of the data stored in a binary matrix in the jmatrix/parallelpam package format.\cr
#' The input matrix of vectors can be a full or a sparse matrix and the algorithm has been modified to calculate faster for sparse matrices.\cr
#' Output matrix type can be float or double type (but look at the comments in 'Details').
#'
#' The parameter restype forces the output to be a matrix of either floats or doubles. Precision of float is normally good enough; but if you need
#' double precision (may be because you expect your results to be in a large range, two to three orders of magnitude), change it.\cr
#' Nevertheless, notice that this at the expense of double memory usage, which is QUADRATIC with the number of individuals (rows) in your input matrix.
#'
#' @param ifname A string with the name of the file containing the counts as a binary matrix.
#' @param ofname A string with the name of the binary output file to contain the symmetric dissimilarity matrix.
#' @param distype The dissimilarity to be calculated. It must be one of these strings: 'L1', 'L2', 'Pearson', 'Cos' or 'WEuc'.\cr
#' Respectively: L1 (Manhattan), L2 (Euclidean), Pearson (Pearson dissimilarity), Cos (cosine distance), WEuc (weigthed Euclidean, with inverse-stdevs as weights).\cr
#' Default: 'L2'.
#' @param restype The data type of the result. It can be one of the strings 'float' or 'double'. Default: float (and don't change it unless you REALLY need to...).
#' @param comment Comment to be added to the dissimilary matrix. Default: "" (no comment)
#' @param nthreads Number of threads to be used for the parallel calculations with this meaning:\cr
#' -1: don't use threads.\cr
#' 0: let the function choose according to the number of rows and to the number of available cores.\cr
#' Any possitive number > 1: use that number of threads. You can use even more than cores, but this is discouraged and raises a warning.\cr
#' Default: 0.
#' @return No return value, called for side effects (creates a file)
#' @examples
#' Rf <- matrix(runif(50000),nrow=100)
#' tmpfile1=paste0(tempdir(),"/Rfullfloat.bin")
#' JWriteBin(Rf,tmpfile1,dtype="float",dmtype="full",
#' comment="Full matrix of floats, 100 rows, 500 columns")
#' JMatInfo(tmpfile1)
#' tmpdisfile1=paste0(tempdir(),"/RfullfloatDis.bin")
#' # Distance file calculated from the matrix stored as full
#' CalcAndWriteDissimilarityMatrix(tmpfile1,tmpdisfile1,distype="L2",
#' restype="float",comment="L2 distance matrix from full",nthreads=0)
#' JMatInfo(tmpdisfile1)
#' tmpfile2=paste0(tempdir(),"/Rsparsefloat.bin")
#' JWriteBin(Rf,tmpfile2,dtype="float",dmtype="sparse",
#' comment="Sparse matrix of floats, 100 rows, 500 columns")
#' JMatInfo(tmpfile2)
#' # Distance file calculated from the matrix stored as sparse
#' tmpdisfile2=paste0(tempdir(),"/RsparsefloatDis.bin")
#' CalcAndWriteDissimilarityMatrix(tmpfile2,tmpdisfile2,distype="L2",
#' restype="float",comment="L2 distance matrix from sparse",nthreads=0)
#' JMatInfo(tmpdisfile2)
#' # Read both versions
#' Dfu<-GetJManyRows(tmpdisfile1,c(1:nrow(Rf)))
#' Dsp<-GetJManyRows(tmpdisfile2,c(1:nrow(Rf)))
#' # and compare them
#' max(Dfu-Dsp)
#' @export
CalcAndWriteDissimilarityMatrix <- function(ifname, ofname, distype = "L2", restype = "float", comment = "", nthreads = 0L) {
invisible(.Call(`_parallelpam_CalcAndWriteDissimilarityMatrix`, ifname, ofname, distype, restype, comment, nthreads))
}
#' FilterJMatByName
#'
#' Takes a jmatrix binary file containing a table with rows and columns and filters it by name, eliminating the rows or columns whose whose names are not in certain list
#'
#' If the table has no list of names in the requested dimension (rows or colums), an error is rised.\cr
#' The row or column names whose names are not found obviosuly cannot remain, and the program rises a warning indicating for which row/column names this happens.\cr
#' The matrix contained in the filtered file will have the same nature (full or sparse) and the same data type as the original.\cr
#' This function can be used to filter either by row or by column name, with appropriate usage of parameter namesat
#'
#' @param fname A string with the file name of the original table
#' @param Gn A list of R strings with the names of the rows or columns that must remain. All others will be filtered out
#' @param filname A string with the file name of the filtered table
#' @param namesat The string "rows" or "cols" indicating if the searched names are in the rows or in the columns of the original table. Default: "rows"
#' @return No return value, called for side effects (creates a file)
#' @examples
#' Rf <- matrix(runif(48),nrow=6)
#' rownames(Rf) <- c("A","B","C","D","E","F")
#' colnames(Rf) <- c("a","b","c","d","e","f","g","h")
#' tmpfile1=paste0(tempdir(),"/Rfullfloat.bin")
#' tmpfile2=paste0(tempdir(),"/Rfullfloatrowfilt.bin")
#' tmpfile3=paste0(tempdir(),"/Rfullfloatrowcolfilt.bin")
#' tmpcsvfile1=paste0(tempdir(),"/Rfullfloat.csv")
#' tmpcsvfile3=paste0(tempdir(),"/Rfullfloatrowcolfilt.csv")
#' JWriteBin(Rf,tmpfile1,dtype="float",dmtype="full",comment="Full matrix of floats")
#' # Let's keep only rows A, C and E
#' FilterJMatByName(tmpfile1,c("A","C","E"),tmpfile2,namesat="rows")
#' # and from the result, let's keep only columns b, d and g
#' FilterJMatByName(tmpfile2,c("b","d","g"),tmpfile3,namesat="cols")
#' JMatToCsv(tmpfile1,tmpcsvfile1)
#' JMatToCsv(tmpfile3,tmpcsvfile3)
#' # You can now compare both ASCII/csv files
#' @export
FilterJMatByName <- function(fname, Gn, filname, namesat = "rows") {
invisible(.Call(`_parallelpam_FilterJMatByName`, fname, Gn, filname, namesat))
}
#' FilterBySilhouetteQuantile
#'
#' Takes a silhouette, as returned by CalculateSilhouette, the list of medoids and class assignments, as returned by ApplyPam,
#' a quantile and the matrices of values and dissimilarities and constructs the corresponding matrices clearing off the points whose silhoutte is
#' below the lower quantile, except if they are medoids.\cr
#'
#' The renumbering of indices in the returned cluster may seem confusing at first but it was the way of fitting this with the rest
#' of the package. Anyway, notice that if the numeric vectors in the input parameter L were named vectors, the point names are appropriately kept
#' in the result so point identity is preserved. Moreover, if the values and dissimilarity input matrices had row and/or column names, they
#' are preserved in the filtered matrices, too.
#'
#' @param s A numeric vector with the sihouette coefficient of each point in a classification, as returned by CalculateSilhouette.
#' @param L A list of two numeric vectors, L$med and L$clasif, obtained normally as the object returned by ApplyPAM.
#' @param fallcounts A string with the name of the binary file containing the matrix of data per point. It can be either a full or a sparse matrix.
#' @param ffilcounts A string with the name of the binary file that will contain the selected points. It will have the same character (full/sparse) and type of the complete file.
#' @param falldissim A string with the name of the binary file containing the dissimilarity matrix of the complete set of points. It must be a symmetric matrix.
#' @param ffildissim A string with the name of the binary file that will contain the dissimilarity matrix for the remaining points. It will be a symmetric matrix of.
#' @param q Quantile to filter. All points whose silhouette is below this quantile will be filtered out. Default: 0.2
#' @param addcom Boolean to indicate if a comment must be appended to the current comment of values and dissimilarity matrices to indicate that they are the result of a filtering process. This comment is automatically generated and contains the value of quantile q. Succesive applications add comments at the end of those already present. Default: TRUE
#' @return Lr["med","clasif"] A list of two numeric vectors.\cr
#' Lr$med is a modification of the correponding first element of the passed L parameter.\cr
#' Lr$clasif has as many components as remaining instances.\cr
#' Since points will have been removed, medoid numbering is modified. Therefore, Lr$med has the NEW index of each medoid in the filtered set.\cr
#' Lr$clasif contains the number of the medoid (i.e.: the cluster) to which each instance has been assigned, and therefore does not change.\cr
#' All indexes start at 1 (R convention). Please, see Details section\cr
#'
#' @examples
#' # Synthetic problem: 10 random seeds with coordinates in [0..20]
#' # to which random values in [-0.1..0.1] are added
#' M<-matrix(0,100,500)
#' rownames(M)<-paste0("rn",c(1:100))
#' for (i in (1:10))
#' {
#' p<-20*runif(500)
#' Rf <- matrix(0.2*(runif(5000)-0.5),nrow=10)
#' for (k in (1:10))
#' {
#' M[10*(i-1)+k,]=p+Rf[k,]
#' }
#' }
#' tmpfile1=paste0(tempdir(),"/pamtest.bin")
#' JWriteBin(M,tmpfile1,dtype="float",dmtype="full")
#' tmpdisfile1=paste0(tempdir(),"/pamDl2.bin")
#' CalcAndWriteDissimilarityMatrix(tmpfile1,tmpdisfile1,distype="L2",restype="float",nthreads=0)
#' L <- ApplyPAM(tmpdisfile1,10,init_method="BUILD")
#' # Which are the medoids
#' L$med
#' sil <- CalculateSilhouette(L$clasif,tmpdisfile1)
#' tmpfiltfile1=paste0(tempdir(),"/pamtestfilt.bin")
#' tmpfiltdisfile1=paste0(tempdir(),"/pamDL2filt.bin")
#' Lf<-FilterBySilhouetteQuantile(sil,L,tmpfile1,tmpfiltfile1,tmpdisfile1,tmpfiltdisfile1,
#' q=0.4,addcom=TRUE)
#' # The new medoids are the same points but renumbered, since the L$clasif array has less points
#' Lf$med
#' @export
FilterBySilhouetteQuantile <- function(s, L, fallcounts, ffilcounts, falldissim, ffildissim, q = 0.2, addcom = TRUE) {
.Call(`_parallelpam_FilterBySilhouetteQuantile`, s, L, fallcounts, ffilcounts, falldissim, ffildissim, q, addcom)
}
#' FilterBySilhouetteThreshold
#'
#' Takes a silhouette, as returned by CalculateSilhouette, the list of medoids and class assignments, as returned by ApplyPam,
#' a threshold and the matrices of values and dissimilarities and constructs the corresponding matrices clearing off the points whose silhoutte is
#' below the threshold, except if they are medoids.\cr
#'
#' The renumbering of indices in the returned cluster may seem confusing at first but it was the way of fitting this with the rest
#' of the package. Anyway, notice that if the numeric vectors in the input parameter L were named vectors, the point names are appropriately kept
#' in the result so point identity is preserved. Moreover, if the values and dissimilarity input matrices had row and/or column names, they
#' are preserved in the filtered matrices, too.
#'
#' @param s A numeric vector with the sihouette coefficient of each point in a classification, as returned by CalculateSilhouette.
#' @param L A list of two numeric vectors, L$med and L$clasif, obtained normally as the object returned by ApplyPAM.
#' @param fallcounts A string with the name of the binary file containing the matrix of values per point. It can be either a full or a sparse matrix.
#' @param ffilcounts A string with the name of the binary file that will contain the selected points. It will have the same character (full/sparse) and type of the complete file.
#' @param falldissim A string with the name of the binary file containing the dissimilarity matrix of the complete set of points. It must be a symmetric matrix.
#' @param ffildissim A string with the name of the binary file that will contain the dissimilarity matrix for the remaining points. It will be a symmetric matrix.
#' @param thres Threshold to filter. All points whose silhouette is below this threshold will be filtered out. Default: 0.0 (remember that silhouette is in [-1..1])
#' @param addcom Boolean to indicate if a comment must be appended to the current comment of values and dissimilarity matrices to indicate that they are the result of a filtering process. This comment is automatically generated and contains the value of threshold t. Succesive applications add comments at the end of those already present. Default: TRUE
#' @return Lr["med","clasif"] A list of two numeric vectors.\cr
#' Lr$med is a modification of the correponding first element of the passed L parameter.\cr
#' Lr$clasif has as many components as remaining instances.\cr
#' Since points will have been removed, medoid numbering is modified. Therefore, Lr$med has the NEW index of each medoid in the filtered set.\cr
#' Lr$clasif contains the number of the medoid (i.e.: the cluster) to which each instance has been assigned, and therefore does not change.\cr
#' All indexes start at 1 (R convention). Please, see Details section\cr
#'
#' @examples
#' # Synthetic problem: 10 random seeds with coordinates in [0..20]
#' # to which random values in [-0.1..0.1] are added
#' M<-matrix(0,100,500)
#' rownames(M)<-paste0("rn",c(1:100))
#' for (i in (1:10))
#' {
#' p<-20*runif(500)
#' Rf <- matrix(0.2*(runif(5000)-0.5),nrow=10)
#' for (k in (1:10))
#' {
#' M[10*(i-1)+k,]=p+Rf[k,]
#' }
#' }
#' tmpfile1=paste0(tempdir(),"/pamtest.bin")
#' JWriteBin(M,tmpfile1,dtype="float",dmtype="full")
#' tmpdisfile1=paste0(tempdir(),"/pamDl2.bin")
#' CalcAndWriteDissimilarityMatrix(tmpfile1,tmpdisfile1,distype="L2",restype="float",nthreads=0)
#' L <- ApplyPAM(tmpdisfile1,10,init_method="BUILD")
#' # Which are the medoids
#' L$med
#' sil <- CalculateSilhouette(L$clasif,tmpdisfile1)
#' tmpfiltfile1=paste0(tempdir(),"/pamtestfilt.bin")
#' tmpfiltdisfile1=paste0(tempdir(),"/pamDL2filt.bin")
#' Lf<-FilterBySilhouetteThreshold(sil,L,tmpfile1,tmpfiltfile1,tmpdisfile1,tmpfiltdisfile1,
#' thres=0.4,addcom=TRUE)
#' # The new medoids are the same points but renumbered, since the L$clasif array has less points
#' Lf$med
#' @export
FilterBySilhouetteThreshold <- function(s, L, fallcounts, ffilcounts, falldissim, ffildissim, thres = 0.0, addcom = TRUE) {
.Call(`_parallelpam_FilterBySilhouetteThreshold`, s, L, fallcounts, ffilcounts, falldissim, ffildissim, thres, addcom)
}
#' ClassifAsDataFrame
#'
#' Returns the results of the classification returned by ApplyPAM as a R dataframe
#'
#' The dataframe has three columns: PointName (name of each point), NNPointName (name of the point which is the center of the cluster to which PointName belongs to)
#' and NNDistance (distance between the points PointName and NNPointName).
#' Medoids are identified by the fact that PointName and NNPointName are equal, or equivalently, NNDistance is 0.
#'
#' @param L The list returned by ApplyPAM with fields L$med and\cr
#' L$clasif with the numbers of the medoids and the classification of each point
#' @param fdist The binary file containing the symmetric matrix with the dissimilarities between points (usually, generated by
#' a call to CalcAndWriteDissimilarityMatrix).
#' @return Df Dataframe with columns PointName, NNPointName and NNDistance. See Details for description.
#' @examples
#' # Synthetic problem: 10 random seeds with coordinates in [0..20]
#' # to which random values in [-0.1..0.1] are added
#' M<-matrix(0,100,500)
#' rownames(M)<-paste0("rn",c(1:100))
#' for (i in (1:10))
#' {
#' p<-20*runif(500)
#' Rf <- matrix(0.2*(runif(5000)-0.5),nrow=10)
#' for (k in (1:10))
#' {
#' M[10*(i-1)+k,]=p+Rf[k,]
#' }
#' }
#' tmpfile1=paste0(tempdir(),"/pamtest.bin")
#' JWriteBin(M,tmpfile1,dtype="float",dmtype="full")
#' tmpdisfile1=paste0(tempdir(),"/pamDL2.bin")
#' CalcAndWriteDissimilarityMatrix(tmpfile1,tmpdisfile1,distype="L2",restype="float",nthreads=0)
#' L <- ApplyPAM(tmpdisfile1,10,init_method="BUILD")
#' df <- ClassifAsDataFrame(L,tmpdisfile1)
#' df
#' # Identification of medoids:
#' which(df[,3]==0)
#' # Verification they are the same as in L (in different order)
#' L$med
#' @export
ClassifAsDataFrame <- function(L, fdist) {
.Call(`_parallelpam_ClassifAsDataFrame`, L, fdist)
}
#' ApplyPAM
#'
#' A function to implement the Partitioning-around-medoids algorithm described in\cr
#' Schubert, E. and Rousseeuw, P.J.: "Fast and eager k-medoids clustering: O(k) runtime improvement of the PAM, CLARA, and CLARANS algorithms."\cr
#' Information Systems, vol. 101, p. 101804, 2021.\cr
#' doi: https://doi.org/10.1016/j.is.2021.101804\cr
#' Notice that the actual values of the vectors (instances) are not needed. To recover them, look at the data matrix
#' used to generate the distance matrix.\cr
#' The number of instances, N, is not passed since dissimilarity matrix is NxN and therefore its size indicates the N value.
#'
#' With respect to the returned value, L$med has as many components\cr
#' as requested medoids and L$clasif has as many components as instances.\cr
#' Medoids are expressed in L$med by its number in the array of points (row in the dissimilarity matrix) starting at 1 (R convention).\cr
#' L$clasif contains the number of the medoid (i.e.: the cluster) to which each instance has been assigned, according to their order in\cr
#' L$med (also from 1).\cr
#' This means that if L$clasif[p] is m, the point p belongs to the\cr
#' class grouped around medoid L$med[m].\cr
#' Moreover, if the dissimilarity matrix contains as metadata\cr
#' (row names) the point names, the returned vector is a R-named vector with such names.
#'
#' @param dissim_file A string with the name of the binary file that contains the symmetric matrix of dissimilarities. Such matrix
#' should have been generated by CalcAndWriteDissimilarityMatrix and it must be a symmetric matrix.
#' @param k A possitive integer (the desired number of medoids).
#' @param init_method One of the strings 'PREV', 'BUILD' or 'LAB'. See meaning of initialization algorithms BUILD and LAB in the original paper.\cr
#' 'PREV' should be used exclusively to start the second part of the algorithm (optimization) from a initial set of medoids generated by a former call.\cr
#' Default: BUILD.
#' @param initial_med A vector with initial medoids to start optimization. It is to be used only by the 'PREV' method and it will have been obtained as the first
#' element (L$med) of the two-element list returned by a previous call to this function used in just-initialize mode (max_iter=0).\cr
#' Default: empty vector.
#' @param max_iter The maximum number of allowed iterations. 0 means stop immediately after finding initial medoids.\cr
#' Default: 1000
#' @param nthreads The number of used threads.\cr
#' -1 means don't use threads (serial implementation).\cr
#' 0 means let the program choose according to the number of cores and of points.\cr
#' Any other number forces this number of threads. Choosing more than the number of available cores is allowed, but discouraged.\cr
#' Default: 0
#' @return L["med","clasif"] A list of two numeric vectors. See section Details for more information\cr
#' @examples
#' # Synthetic problem: 10 random seeds with coordinates in [0..20]
#' # to which random values in [-0.1..0.1] are added
#' M<-matrix(0,100,500)
#' rownames(M)<-paste0("rn",c(1:100))
#' for (i in (1:10))
#' {
#' p<-20*runif(500)
#' Rf <- matrix(0.2*(runif(5000)-0.5),nrow=10)
#' for (k in (1:10))
#' {
#' M[10*(i-1)+k,]=p+Rf[k,]
#' }
#' }
#' tmpfile1=paste0(tempdir(),"/pamtest.bin")
#' JWriteBin(M,tmpfile1,dtype="float",dmtype="full")
#' tmpdisfile1=paste0(tempdir(),"/pamDL2.bin")
#' CalcAndWriteDissimilarityMatrix(tmpfile1,tmpdisfile1,distype="L2",restype="float",nthreads=0)
#' L <- ApplyPAM(tmpdisfile1,10,init_method="BUILD")
#' # Final value of sum of distances to closest medoid
#' GetTD(L,tmpdisfile1)
#' # Medoids:
#' L$med
#' # Medoid in which each individual has been classified
#' n<-names(L$med)
#' n[L$clasif]
#' @export
ApplyPAM <- function(dissim_file, k, init_method = "BUILD", initial_med = NULL, max_iter = 1000L, nthreads = 0L) {
.Call(`_parallelpam_ApplyPAM`, dissim_file, k, init_method, initial_med, max_iter, nthreads)
}
#' GetTD
#'
#' Function that takes a PAM classification (as returned by ApplyPAM) and the dissimilarity matrix and returns the value of the TD function
#' (sum of dissimilarities between each point and its closest medoid, divided by the number of points).
#' This function is mainly for debugging/internal use.
#'
#' @param L A list of two numeric vectors, L["med","clasif"], as returned by ApplyPAM (please, consult the help of ApplyPAM for details)
#' @param dissim_file A string with the name of the binary file that contains the symmetric matrix of dissimilarities. Such matrix
#' should have been generated by CalcAndWriteDissimilarityMatrix.
#' @return TD The value of the TD function.
#' @examples
#' # Synthetic problem: 10 random seeds with coordinates in [0..20]
#' # to which random values in [-0.1..0.1] are added
#' M<-matrix(0,100,500)
#' rownames(M)<-paste0("rn",c(1:100))
#' for (i in (1:10))
#' {
#' p<-20*runif(500)
#' Rf <- matrix(0.2*(runif(5000)-0.5),nrow=10)
#' for (k in (1:10))
#' {
#' M[10*(i-1)+k,]=p+Rf[k,]
#' }
#' }
#' tmpfile1=paste0(tempdir(),"/pamtest.bin")
#' tmpdisfile1=paste0(tempdir(),"/pamDL2.bin")
#' JWriteBin(M,tmpfile1,dtype="float",dmtype="full")
#' CalcAndWriteDissimilarityMatrix(tmpfile1,tmpdisfile1,distype="L2",restype="float",nthreads=0)
#' L <- ApplyPAM(tmpdisfile1,10,init_method="BUILD")
#' # Final value of sum of distances to closest medoid
#' GetTD(L,tmpdisfile1)
#' @export
GetTD <- function(L, dissim_file) {
.Call(`_parallelpam_GetTD`, L, dissim_file)
}
#' GetJCol
#'
#' Returns (as a R numeric vector) the requested column number from the matrix contained in a jmatrix binary file
#'
#' @param fname String with the file name that contains the binary data.
#' @param ncol The number of the column to be returned, in R-numbering (from 1)
#' @return A numeric vector with the values of elements in the requested column
#' @examples
#' Rf <- matrix(runif(48),nrow=6)
#' rownames(Rf) <- c("A","B","C","D","E","F")
#' colnames(Rf) <- c("a","b","c","d","e","f","g","h")
#' tmpfile1=paste0(tempdir(),"/Rfullfloat.bin")
#' JWriteBin(Rf,tmpfile1,dtype="float",dmtype="full",comment="Full matrix of floats")
#' Rf[,3]
#' vf<-GetJCol(tmpfile1,3)
#' vf
#' @export
GetJCol <- function(fname, ncol) {
.Call(`_parallelpam_GetJCol`, fname, ncol)
}
#' GetJManyCols
#'
#' Returns (as a R numeric matrix) the columns with the requested column numbers from the matrix contained in a jmatrix binary file
#'
#' @param fname String with the file name that contains the binary data.
#' @param extcols A numeric vector with the indexes of the columns to be extracted, in R-numbering (from 1)
#' @return A numeric matrix with the values of elements in the requested columns
#' @examples
#' Rf <- matrix(runif(48),nrow=6)
#' rownames(Rf) <- c("A","B","C","D","E","F")
#' colnames(Rf) <- c("a","b","c","d","e","f","g","h")
#' tmpfile1=paste0(tempdir(),"/Rfullfloat.bin")
#' JWriteBin(Rf,tmpfile1,dtype="float",dmtype="full",comment="Full matrix of floats")
#' vc<-GetJManyCols(tmpfile1,c(1,4))
#' vc
#' @export
GetJManyCols <- function(fname, extcols) {
.Call(`_parallelpam_GetJManyCols`, fname, extcols)
}
#' GetJColByName
#'
#' Returns (as a R numeric vector) the requested named column from the matrix contained in a jmatrix binary file
#'
#' @param fname String with the file name that contains the binary data.
#' @param colname The name of the column to be returned. If the matrix has no column names, or the name is not found, an empty vector is returned
#' @return A numeric vector with the values of elements in the requested column
#' @examples
#' Rf <- matrix(runif(48),nrow=6)
#' rownames(Rf) <- c("A","B","C","D","E","F")
#' colnames(Rf) <- c("a","b","c","d","e","f","g","h")
#' tmpfile1=paste0(tempdir(),"/Rfullfloat.bin")
#' JWriteBin(Rf,tmpfile1,dtype="float",dmtype="full",comment="Full matrix of floats")
#' Rf[,"c"]
#' vf<-GetJColByName(tmpfile1,"c")
#' vf
#' @export
GetJColByName <- function(fname, colname) {
.Call(`_parallelpam_GetJColByName`, fname, colname)
}
#' GetJManyColsByNames
#'
#' Returns (as a R numeric matrix) the columns with the requested column names from the matrix contained in a jmatrix binary file
#'
#' @param fname String with the file name that contains the binary data.
#' @param extcolnames A vector of RStrings with the names of the columns to be extracted. If the binary file has no column names, or _any_ of the column names is not present, an empty matrix is returned.
#' @return A numeric matrix with the values of elements in the requested columns
#' @examples
#' Rf <- matrix(runif(48),nrow=6)
#' rownames(Rf) <- c("A","B","C","D","E","F")
#' colnames(Rf) <- c("a","b","c","d","e","f","g","h")
#' tmpfile1=paste0(tempdir(),"/Rfullfloat.bin")
#' JWriteBin(Rf,tmpfile1,dtype="float",dmtype="full",comment="Full matrix of floats")
#' Rf[,c(1,4)]
#' vf<-GetJManyColsByNames(tmpfile1,c("a","d"))
#' vf
#' @export
GetJManyColsByNames <- function(fname, extcolnames) {
.Call(`_parallelpam_GetJManyColsByNames`, fname, extcolnames)
}
#' GetSubdiag
#'
#' Takes a symmetric matrix and returns a vector with all its elements under the main diagonal (without those at the diagonal itself)
#' Done as an instrumental function to check the PAM in package cluster. To be removed in final version of the package.
#'
#' @param fname The name of the file with the dissimilarity matrix in jmatrix binary format.
#' @return The vector with the values under the main diagonal, sorted by columns (i.e.: m(2,1) .. m(n,1), m(3,2)..m(n,2),..., m(n-1,n))
#' @examples
#' Rns <- matrix(runif(49),nrow=7)
#' Rsym <- 0.5*(Rns+t(Rns))
#' rownames(Rsym) <- c("A","B","C","D","E","F","G")
#' colnames(Rsym) <- c("a","b","c","d","e","f","g")
#' tmpfile1=paste0(tempdir(),"/Rsymfloat.bin")
#' JWriteBin(Rsym,tmpfile1,dtype="float",dmtype="symmetric")
#' d<-GetSubdiag(tmpfile1)
#' Rsym
#' d
#' @export
GetSubdiag <- function(fname) {
.Call(`_parallelpam_GetSubdiag`, fname)
}
#' GetJRow
#'
#' Returns (as a R numeric vector) the requested row number from the matrix contained in a jmatrix binary file
#'
#' @param fname String with the file name that contains the binary data.
#' @param nrow The number of the row to be returned, in R-numbering (from 1)
#' @return A numeric vector with the values of elements in the requested row
#' @examples
#' Rf <- matrix(runif(48),nrow=6)
#' rownames(Rf) <- c("A","B","C","D","E","F")
#' colnames(Rf) <- c("a","b","c","d","e","f","g","h")
#' tmpfile1=paste0(tempdir(),"/Rfullfloat.bin")
#' JWriteBin(Rf,tmpfile1,dtype="float",dmtype="full",comment="Full matrix of floats")
#' Rf[3,]
#' vf<-GetJRow(tmpfile1,3)
#' vf
#' @export
GetJRow <- function(fname, nrow) {
.Call(`_parallelpam_GetJRow`, fname, nrow)
}
#' GetJManyRows
#'
#' Returns (as a R numeric matrix) the rows with the requested row numbers from the matrix contained in a jmatrix binary file
#'
#' @param fname String with the file name that contains the binary data.
#' @param extrows A numeric vector with the indexes of the rows to be extracted, in R-numbering (from 1)
#' @return A numeric matrix with the values of elements in the requested rows
#' @examples
#' Rf <- matrix(runif(48),nrow=6)
#' rownames(Rf) <- c("A","B","C","D","E","F")
#' colnames(Rf) <- c("a","b","c","d","e","f","g","h")
#' tmpfile1=paste0(tempdir(),"/Rfullfloat.bin")
#' JWriteBin(Rf,tmpfile1,dtype="float",dmtype="full",comment="Full matrix of floats")
#' Rf[c(1,4),]
#' vc<-GetJManyRows(tmpfile1,c(1,4))
#' vc
#' @export
GetJManyRows <- function(fname, extrows) {
.Call(`_parallelpam_GetJManyRows`, fname, extrows)
}
#' GetJRowByName
#'
#' Returns (as a R numeric vector) the requested named row from the matrix contained in a jmatrix binary file
#'
#' @param fname String with the file name that contains the binary data.
#' @param rowname The name of the row to be returned. If the matrix has no row names, or the name is not found, an empty vector is returned
#' @return A numeric vector with the values of elements in the requested row
#' @examples
#' Rf <- matrix(runif(48),nrow=6)
#' rownames(Rf) <- c("A","B","C","D","E","F")
#' colnames(Rf) <- c("a","b","c","d","e","f","g","h")
#' tmpfile1=paste0(tempdir(),"/Rfullfloat.bin")
#' JWriteBin(Rf,tmpfile1,dtype="float",dmtype="full",comment="Full matrix of floats")
#' Rf["C",]
#' vf<-GetJRowByName(tmpfile1,"C")
#' vf
#' @export
GetJRowByName <- function(fname, rowname) {
.Call(`_parallelpam_GetJRowByName`, fname, rowname)
}
#' GetJManyRowsByNames
#'
#' Returns (as a R numeric matrix) the rows with the requested row names from the matrix contained in a jmatrix binary file
#'
#' @param fname String with the file name that contains the binary data.
#' @param extrownames A vector of RStrings with the names of the rows to be extracted. If the binary file has no row names, or _any_ of the row names is not present, an empty matrix is returned.
#' @return A numeric matrix with the values of elements in the requested rows
#' @examples
#' Rf <- matrix(runif(48),nrow=6)
#' rownames(Rf) <- c("A","B","C","D","E","F")
#' colnames(Rf) <- c("a","b","c","d","e","f","g","h")
#' tmpfile1=paste0(tempdir(),"/Rfullfloat.bin")
#' JWriteBin(Rf,tmpfile1,dtype="float",dmtype="full",comment="Full matrix of floats")
#' Rf[c("A","C"),]
#' vf<-GetJManyRowsByNames(tmpfile1,c("A","C"))
#' vf
#' @export
GetJManyRowsByNames <- function(fname, extrownames) {
.Call(`_parallelpam_GetJManyRowsByNames`, fname, extrownames)
}
#' JMatInfo
#'
#' Shows in the screen or writes to a file information about a matrix stored in the binary format of package jmatrix
#'
#' @param fname String with the file name that contains the binary data.
#' @param fres String with the name of the file to write the information. Default: "" (information is written to the console)
#' @return No return value, called for its side effects (writes on screen or creates a file)
#' @examples
#' Rf <- matrix(runif(48),nrow=6)
#' rownames(Rf) <- c("A","B","C","D","E","F")
#' colnames(Rf) <- c("a","b","c","d","e","f","g","h")
#' tmpfile1=paste0(tempdir(),"/Rfullfloat.bin")
#' JWriteBin(Rf,tmpfile1,dtype="float",dmtype="full",comment="Full matrix of floats")
#' JMatInfo(tmpfile1)
#' @export
JMatInfo <- function(fname, fres = "") {
invisible(.Call(`_parallelpam_JMatInfo`, fname, fres))
}
#' GetJRowNames
#'
#' Returns a R StringVector with the row names of a matrix stored in the binary format of package jmatrix, if it has them stored.
#'
#' @param fname String with the file name that contains the binary data.
#' @return A R StringVector with the row names, or the empty vector if the binary file has no row names as metadata.
#' @examples
#' Rf <- matrix(runif(48),nrow=6)
#' rownames(Rf) <- c("A","B","C","D","E","F")
#' colnames(Rf) <- c("a","b","c","d","e","f","g","h")
#' tmpfile1=paste0(tempdir(),"/Rfullfloat.bin")
#' JWriteBin(Rf,tmpfile1,dtype="float",dmtype="full",comment="Full matrix of floats")
#' rn<-GetJRowNames(tmpfile1)
#' rn
#' @export
GetJRowNames <- function(fname) {
.Call(`_parallelpam_GetJRowNames`, fname)
}
#' GetJColNames
#'
#' Returns a R StringVector with the column names of a matrix stored in the binary format of package jmatrix, if it has them stored.
#'
#' @param fname String with the file name that contains the binary data.
#' @return A R StringVector with the column names, or the empty vector if the binaryfile has no column names as metadata.
#' @examples
#' Rf <- matrix(runif(48),nrow=6)
#' rownames(Rf) <- c("A","B","C","D","E","F")
#' colnames(Rf) <- c("a","b","c","d","e","f","g","h")
#' tmpfile1=paste0(tempdir(),"/Rfullfloat.bin")
#' JWriteBin(Rf,tmpfile1,dtype="float",dmtype="full",comment="Full matrix of floats")
#' cn<-GetJColNames(tmpfile1)
#' cn
#' @export
GetJColNames <- function(fname) {
.Call(`_parallelpam_GetJColNames`, fname)
}
#' GetJNames
#'
#' Returns a R list of two elements, rownames and colnames, each of them being a R StringVector with the corresponding names
#'
#' @param fname String with the file name that contains the binary data.
#' @return N["rownames","colnames"]: A list with two elements named rownames and colnames which are R StringVectors.
#' If the binary file has no row or column names as metadata BOTH will be returned as empty vectors, even if one of them exists.
#' If you want to extract only one, use either GetJRowNames or GetJColNames, as appropriate.
#' @examples
#' Rf <- matrix(runif(48),nrow=6)
#' rownames(Rf) <- c("A","B","C","D","E","F")
#' colnames(Rf) <- c("a","b","c","d","e","f","g","h")
#' tmpfile1=paste0(tempdir(),"/Rfullfloat.bin")
#' JWriteBin(Rf,tmpfile1,dtype="float",dmtype="full",comment="Full matrix of floats")
#' N<-GetJNames(tmpfile1)
#' N["rownames"]
#' N["colnames"]
#' @export
GetJNames <- function(fname) {
.Call(`_parallelpam_GetJNames`, fname)
}
#' JWriteBin
#'
#' Writes a R matrix to a disk file as a binary matrix in the jmatrix format
#'
#'
#' Use this function cautiously. Differently to the functions to get one or more rows or columns from the binary file,
#' which book only the memory strictly needed for the vector/matrix and do not load all the binary file in memory,
#' this function books the full matrix in the requested data type and writes it later so with very big matrices
#' you might run out of memory.\cr
#' Type 'int' is really long int (8-bytes in most modern machines) so using 'int' or 'long' is equivalent.\cr
#' Type is coerced from double (the internal type of R matrices) to the requested type, which may provoke a loose of precision.\cr
#' If M is a named-R matrix, row and column names are written as metadata, too.\cr
#' Also, if you write as symmetric a matrix which is not such, only the lower-diagonal part will be written.
#' The rest of the data will be lost. In this case, if the matrix has row and column names, only row names are written.
#'
#' @param M The R matrix to be written
#' @param fname The name of the file to write
#' @param dtype The data type of the matrix to be written: one of the strings 'short', 'int', 'long', 'float' or 'double'. Default: 'float'
#' @param dmtype The matrix type: one of the strings 'full', 'sparse' or 'symmetric'. Default: 'full'
#' @param comment A optional string with the comment to be added as metadata. Default: "" (empty string, no added comment)
#' @return No return value, called for side effects (creates a file)
#' @examples
#' Rf <- matrix(runif(48),nrow=6)
#' rownames(Rf) <- c("A","B","C","D","E","F")
#' colnames(Rf) <- c("a","b","c","d","e","f","g","h")
#' tmpfile1=paste0(tempdir(),"/Rfullfloat.bin")
#' JWriteBin(Rf,tmpfile1,dtype="float",dmtype="full",comment="Full matrix of floats")
#' @export
JWriteBin <- function(M, fname, dtype = "float", dmtype = "full", comment = "") {
invisible(.Call(`_parallelpam_JWriteBin`, M, fname, dtype, dmtype, comment))
}
#' CalculateSilhouette
#'
#' Calculates the silhouette of each point of those classified by a clustering algorithm.
#'
#'
#' @param cl The array of classification with the number of the class to which each point belongs to. This number must be in 1..number_of_classes.\cr
#' This function takes something like the L$clasif array which is the second element of the list returned by ApplyPAM
#' @param fdist The binary file containing the symmetric matrix with the dissimilarities between points (usually, generated by a call to CalcAndWriteDissimilarityMatrix)
#' @param nthreads The number of used threads for parallel calculation.\cr
#' -1 means don't use threads (serial implementation).\cr
#' 0 means let the program choose according to the number of cores and of points.\cr
#' Any other number forces this number of threads. Choosing more than the number of available cores is allowed, but discouraged.\cr
#' Default: 0
#' @return sil Numeric vector with the values of the silhouette for each point, in the same order in which points are in cl.\cr
#' If cl is a named vector sil will be a named vector, too, with the same names.
#' @examples
#' # Synthetic problem: 10 random seeds with coordinates in [0..20]
#' # to which random values in [-0.1..0.1] are added
#' M<-matrix(0,100,500)
#' rownames(M)<-paste0("rn",c(1:100))
#' for (i in (1:10))
#' {
#' p<-20*runif(500)
#' Rf <- matrix(0.2*(runif(5000)-0.5),nrow=10)
#' for (k in (1:10))
#' {
#' M[10*(i-1)+k,]=p+Rf[k,]
#' }
#' }
#' tmpfile1=paste0(tempdir(),"/pamtest.bin")
#' JWriteBin(M,tmpfile1,dtype="float",dmtype="full")
#' tmpdisfile1=paste0(tempdir(),"/pamDL2.bin")
#' CalcAndWriteDissimilarityMatrix(tmpfile1,tmpdisfile1,distype="L2",restype="float",nthreads=0)
#' L <- ApplyPAM(tmpdisfile1,10,init_method="BUILD")
#' sil <- CalculateSilhouette(L$clasif,tmpdisfile1)
#' # Histogram of the silhouette. In this synthetic problem, almost 1 for all points
#' hist(sil)
#' @export
CalculateSilhouette <- function(cl, fdist, nthreads = 0L) {
.Call(`_parallelpam_CalculateSilhouette`, cl, fdist, nthreads)
}
#' NumSilToClusterSil
#'
#' Takes a silhouette in the form of a NumericVector, as returned by CalculateSilhouette, and returns it as a numeric matrix appropriate to be plotted by the package 'cluster'
#'
#' @param cl The array of classification with the number of the class to which each point belongs to. This number must be in 1..number_of_classes.\cr
#' This function takes something like the L$clasif array which is the second element of the list returned by ApplyPAM
#' @param s The numeric value of the silhouette for each point, with points in the same order as they appear in cl.\cr
#' This is the vector returned by a call to CalculateSilhouette with the same value of parameter cl.
#' @return sp A silhouette in the format of the cluster package which is a NumericMatrix with as many rows as points and three columns: cluster, neighbor and sil_width.\cr
#' Its structure and dimension names are as in package 'cluster', which allows to use it with the silhouette plotting functions of such package\cr
#' This means you can do library(cluster) followed by plot(NumSilToClusterSil(cl,s)) to get a beatiful plot.
#' @examples
#' # Synthetic problem: 10 random seeds with coordinates in [0..20]
#' # to which random values in [-0.1..0.1] are added
#' M<-matrix(0,100,500)
#' rownames(M)<-paste0("rn",c(1:100))
#' for (i in (1:10))
#' {
#' p<-20*runif(500)
#' Rf <- matrix(0.2*(runif(5000)-0.5),nrow=10)
#' for (k in (1:10))
#' {
#' M[10*(i-1)+k,]=p+Rf[k,]
#' }
#' }
#' tmpfile1=paste0(tempdir(),"/pamtest.bin")
#' JWriteBin(M,tmpfile1,dtype="float",dmtype="full")
#' tmpdisfile1=paste0(tempdir(),"/pamDL2.bin")
#' CalcAndWriteDissimilarityMatrix(tmpfile1,tmpdisfile1,distype="L2",restype="float",nthreads=0)
#' L <- ApplyPAM(tmpdisfile1,10,init_method="BUILD")
#' sil <- CalculateSilhouette(L$clasif,tmpdisfile1)
#' sp <- NumSilToClusterSil(L$clasif,sil)
#' library(cluster)
#' plot(sp)
#' @export
NumSilToClusterSil <- function(cl, s) {
.Call(`_parallelpam_NumSilToClusterSil`, cl, s)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.