Join {str2str} | R Documentation |
Join (or Merge) a List of Data-frames
Description
Join
merges a list of data.frames into a single data.frame. It is a
looped version of plyr::join
that allows you to merge more than 2
data.frames in the same function call. It is different from plyr::join_all
because it allows you to join by the row.names.
Usage
Join(
data.list,
by,
type = "full",
match = "all",
rownamesAsColumn = FALSE,
rtn.rownames.nm = "row_names"
)
Arguments
data.list |
list of data.frames of data. |
by |
character vector specifying what colnames to merge |
type |
character vector of length 1 specifying the type of merge. Options
are the following: 1. "full" = all rows from any of the data.frames in
|
match |
character vector of length 1 specifying whether merged elements should
be repeated in each row of the return object when duplicate values exist on the
|
rownamesAsColumn |
logical vector of length 1 specifying whether the original
rownames in |
rtn.rownames.nm |
character vector of length 1 specifying what the names of the rownames
column should be in the return object. The |
Details
Join
is a polished rendition of Reduce(f = plyr::join, x = data.list)
.
A future version of the function might allow for the init
and right
arguments from Reduce
.
Value
data.frame of all uniquely colnamed columns from data.list
with
the rows included specified by type
and rownames specified by rownamesAsColumn
.
Similar to plyr::join
, Join
returns the rows in the same order as
they appeared in data.list
.
See Also
Examples
# by column
mtcars1 <- mtcars
mtcars1$"id" <- row.names(mtcars)
mtcars2 <- data.frame("id" = mtcars1$"id", "forward" = 1:32)
mtcars3 <- data.frame("id" = mtcars1$"id", "backward" = 32:1)
mtcars_list <- list(mtcars1, mtcars2, mtcars3)
by_column <- Join(data.list = mtcars_list, by = "id")
by_column2 <- Join(data.list = mtcars_list, by = "id", rownamesAsColumn = TRUE)
by_column3 <- Join(data.list = mtcars_list, by = NULL)
# by rownames
mtcars1 <- mtcars
mtcars2 <- data.frame("forward" = 1:32, row.names = row.names(mtcars))
mtcars3 <- data.frame("backward" = 32:1, row.names = row.names(mtcars))
by_rownm <- Join(data.list = list(mtcars1, mtcars2, mtcars3), by = "0")
by_rownm2 <- Join(data.list = list(mtcars1, mtcars2, mtcars3), by = "0",
rownamesAsColumn = TRUE)
identical(x = by_column[names(by_column) != "id"],
y = by_rownm) # same as converting rownames to a column in the data
identical(x = by_column2[names(by_column2) != "id"],
y = by_rownm2) # same as converting rownames to a column in the data
# inserted NAs (by columns)
mtcars1 <- mtcars[1:4]
mtcars2 <- setNames(obj = as.data.frame(scale(x = mtcars1[-1],
center = TRUE, scale = FALSE)), nm = paste0(names(mtcars1[-1]), "_c"))
mtcars3 <- setNames(obj = as.data.frame(scale(x = mtcars1[-1],
center = FALSE, scale = TRUE)), nm = paste0(names(mtcars1[-1]), "_s"))
tmp <- lapply(X = list(mtcars1, mtcars2, mtcars3), FUN = function(dat)
dat[sample(x = row.names(dat), size = 10), ])
mtcars_list <- lapply(X = tmp, FUN = reshape::namerows)
by_column_NA <- Join(data.list = mtcars_list, by = "id") # join by row.names
by_column_NA2 <- Join(data.list = mtcars_list, by = "id", rownamesAsColumn = TRUE)
identical(x = row.names(by_column_NA), # rownames from any data.frame are retained
y = Reduce(f = union, x = lapply(X = mtcars_list, FUN = row.names)))
# inserted NAs (by rownames)
mtcars1 <- mtcars[1:4]
mtcars2 <- setNames(obj = as.data.frame(scale(x = mtcars1, center = TRUE, scale = FALSE)),
nm = paste0(names(mtcars1), "_c"))
mtcars3 <- setNames(obj = as.data.frame(scale(x = mtcars1, center = FALSE, scale = TRUE)),
nm = paste0(names(mtcars1), "_s"))
mtcars_list <- lapply(X = list(mtcars1, mtcars2, mtcars3), FUN = function(dat)
dat[sample(x = row.names(dat), size = 10), ])
by_rownm_NA <- Join(data.list = mtcars_list, by = "0") # join by row.names
by_rownm_NA2 <- Join(data.list = mtcars_list, by = "0", rownamesAsColumn = TRUE)
identical(x = row.names(by_rownm_NA), # rownames from any data.frame are retained
y = Reduce(f = union, x = lapply(X = mtcars_list, FUN = row.names)))
# types of joins
Join(data.list = mtcars_list, by = "0", type = "left") # only rows included in mtcars1
Join(data.list = mtcars_list, by = "0", type = "right") # only rows included in mtcars3
Join(data.list = mtcars_list, by = "0", type = "inner") # only rows included in
# all 3 data.frames (might be empty due to random chance from sample() call)
# errors returned
tmp <- str2str::try_expr(
Join(data.list = list(mtcars, as.matrix(mtcars), as.matrix(mtcars)))
)
print(tmp[["error"]]) # "The elements with the following positions in
# `data.list` are not data.frames: 2 , 3"
tmp <- str2str::try_expr(
Join(data.list = replicate(n = 3, mtcars, simplify = FALSE), by = 0)
)
print(tmp[["error"]]) # "Assertion on 'by' failed: Must be of type
# 'character' (or 'NULL'), not 'double'."
tmp <- str2str::try_expr(
Join(data.list = replicate(n = 3, mtcars, simplify = FALSE), by = c("0","mpg"))
)
print(tmp[["error"]]) # "If '0' is a value in `by`, then it must be the
# only value and `by` must be length 1."
tmp <- str2str::try_expr(
Join(data.list = list(attitude, attitude, mtcars), by = "mpg")
)
print(tmp[["error"]]) # "The data.frames associated with the following positions in
# `data.list` do not contain the `by` columns: 1 , 2"