cmap {disk.frame} | R Documentation |
Apply the same function to all chunks
Description
Apply the same function to all chunks
'cimap.disk.frame' accepts a two argument function where the first argument is a data.frame and the second is the chunk ID
'lazy' is convenience function to apply '.f' to every chunk
'delayed' is an alias for lazy and is consistent with the naming in Dask and Dagger.jl
Usage
cmap(.x, .f, ...)
## S3 method for class 'disk.frame'
cmap(.x, .f, ...)
cmap_dfr(.x, .f, ..., .id = NULL)
## S3 method for class 'disk.frame'
cmap_dfr(.x, .f, ..., .id = NULL, use.names = fill, fill = FALSE, idcol = NULL)
cimap(.x, .f, ...)
## S3 method for class 'disk.frame'
cimap(
.x,
.f,
outdir = NULL,
keep = NULL,
lazy = TRUE,
overwrite = FALSE,
compress = 50,
...
)
cimap_dfr(.x, .f, ..., .id = NULL)
## S3 method for class 'disk.frame'
cimap_dfr(
.x,
.f,
...,
.id = NULL,
use.names = fill,
fill = FALSE,
idcol = NULL
)
lazy(.x, .f, ...)
## S3 method for class 'disk.frame'
lazy(.x, .f, ...)
delayed(.x, .f, ...)
clapply(...)
Arguments
.x |
a disk.frame |
.f |
a function to apply to each of the chunks |
... |
Passed to 'collect' and 'write_disk.frame' |
.id |
ignored |
use.names |
for cmap_dfr's call to data.table::rbindlist. See data.table::rbindlist |
fill |
for cmap_dfr's call to data.table::rbindlist. See data.table::rbindlist |
idcol |
for cmap_dfr's call to data.table::rbindlist. See data.table::rbindlist |
outdir |
the output directory |
keep |
The columns to keep at source |
lazy |
if TRUE then do this lazily |
overwrite |
Whether to overwrite any files in the output directory |
compress |
The compression setting. 0-100 |
Examples
cars.df = as.disk.frame(cars)
# return the first row of each chunk lazily
#
cars2 = cmap(cars.df, function(chunk) {
chunk[,1]
})
collect(cars2)
# same as above but using purrr
cars2 = cmap(cars.df, ~.x[1,])
collect(cars2)
# return the first row of each chunk eagerly as list
cmap(cars.df, ~.x[1,], lazy = FALSE)
# return the first row of each chunk eagerly as data.table/data.frame by row-binding
cmap_dfr(cars.df, ~.x[1,])
# lazy and delayed are just an aliases for cmap(..., lazy = TRUE)
collect(lazy(cars.df, ~.x[1,]))
collect(delayed(cars.df, ~.x[1,]))
# clean up cars.df
delete(cars.df)