Description Usage Arguments Details Examples
Implementation of merge
for Matrix
. By explicitly
calling merge.Matrix
it will also work for matrix
, for
data.frame
, and vector
objects as a much faster alternative to
the built-in merge
.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | ## S3 method for class 'Matrix'
merge(
x,
y,
by.x,
by.y,
all.x = TRUE,
all.y = TRUE,
out.class = class(x)[1],
fill.x = ifelse(is(x, "sparseMatrix"), FALSE, NA),
fill.y = fill.x,
...
)
join.Matrix(
x,
y,
by.x,
by.y,
all.x = TRUE,
all.y = TRUE,
out.class = class(x)[1],
fill.x = ifelse(is(x, "sparseMatrix"), FALSE, NA),
fill.y = fill.x,
...
)
|
x, y |
|
by.x |
vector indicating the names to match from |
by.y |
vector indicating the names to match from |
all.x |
logical; if |
all.y |
logical; if |
out.class |
the class of the output object. Defaults to the class of x. Note that some output classes are not possible due to R coercion capabilities, such as converting a character matrix to a Matrix. |
fill.x, fill.y |
the value to put in merged columns where there is no match. Defaults to 0/FALSE for sparse matrices in order to preserve sparsity, NA for all other classes |
... |
arguments to be passed to or from methods. Currently ignored |
#' all.x/all.y
correspond to the four types of database joins in the
following way:
all.x=TRUE
, all.y=FALSE
all.x=FALSE
, all.y=TRUE
all.x=FALSE
, all.y=FALSE
all.x=TRUE
, all.y=TRUE
Note that NA
values will match other NA
values.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | orders<-Matrix(as.matrix(data.frame(orderNum=1:1000,
customer=sample(100,1000,TRUE))))
cancelledOrders<-Matrix(as.matrix(data.frame(orderNum=sample(1000,100),
cancelled=1)))
skus<-Matrix(as.matrix(data.frame(orderNum=sample(1000,10000,TRUE),
sku=sample(1000,10000,TRUE), amount=runif(10000))))
a<-merge(orders,cancelledOrders,orders[,'orderNum'],cancelledOrders[,'orderNum'])
b<-merge(orders,cancelledOrders,orders[,'orderNum'],cancelledOrders[,'orderNum'],all.x=FALSE)
c<-merge(orders,skus,orders[,'orderNum'],skus[,'orderNum'])
#The above Matrices could be converted to matrices or data.frames and handled in other methods.
#However, this is not possible in the sparse case, which can be handled by this function:
sm<-cbind2(1:200000,rsparsematrix(200000,10000,density=.0001))
sm2<-cbind2(sample(1:200000,50000,TRUE),rsparsematrix(200000,10,density=.01))
sm3<-merge.Matrix(sm,sm2,by.x=sm[,1],by.y=sm2[,1])
## Not run:
#merge.Matrix can also handle many other data types, such as data frames, and is generally fast.
orders<-data.frame(orderNum=as.character(sample(1e5, 1e6, TRUE)),
sku=sample(1e3, 1e6, TRUE),
customer=sample(1e4,1e6,TRUE),stringsAsFactors=FALSE)
cancelledOrders<-data.frame(orderNum=as.character(sample(1e5,1e4)),
cancelled=1,stringsAsFactors=FALSE)
system.time(a<-merge.Matrix(orders,cancelledOrders,orders[,'orderNum'],
cancelledOrders[,'orderNum']))
system.time(b<-merge.data.frame(orders,cancelledOrders,all.x = TRUE,all.y=TRUE))
system.time(c<-dplyr::full_join(orders,cancelledOrders))
system.time({require(data.table);
d<-merge(data.table(orders),data.table(cancelledOrders),
by='orderNum',all=TRUE,allow.cartesian=TRUE)})
orders<-data.frame(orderNum=sample(1e5, 1e6, TRUE), sku=sample(1e3, 1e6,
TRUE), customer=sample(1e4,1e6,TRUE),stringsAsFactors=FALSE)
cancelledOrders<-data.frame(orderNum=sample(1e5,1e4),cancelled=1,stringsAsFactors=FALSE)
system.time(b<-merge.Matrix(orders,cancelledOrders,orders[,'orderNum'],
cancelledOrders[,'orderNum']))
system.time(e<-dplyr::full_join(orders,cancelledOrders))
system.time({require(data.table);
d<-merge(data.table(orders),data.table(cancelledOrders),
by='orderNum',all=TRUE,allow.cartesian=TRUE)})
#In certain cases, merge.Matrix can be much faster than alternatives.
one<-as.character(1:1000000)
two<-as.character(sample(1:1000000,1e5,TRUE))
system.time(b<-merge.Matrix(one,two,one,two))
system.time(c<-dplyr::full_join(data.frame(key=one),data.frame(key=two)))
system.time({require(data.table);
d<-merge(data.table(data.frame(key=one)),data.table(data.frame(key=two)),
by='key',all=TRUE,allow.cartesian=TRUE)})
## End(Not run)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.