| Join {str2str} | R Documentation |
Join (or Merge) a List of Data-frames
Description
Join merges a list of data.frames into a single data.frame. It is a
looped version of plyr::join that allows you to merge more than 2
data.frames in the same function call. It is different from plyr::join_all
because it allows you to join by the row.names.
Usage
Join(
data.list,
by,
type = "full",
match = "all",
rownamesAsColumn = FALSE,
rtn.rownames.nm = "row_names"
)
Arguments
data.list |
list of data.frames of data. |
by |
character vector specifying what colnames to merge |
type |
character vector of length 1 specifying the type of merge. Options
are the following: 1. "full" = all rows from any of the data.frames in
|
match |
character vector of length 1 specifying whether merged elements should
be repeated in each row of the return object when duplicate values exist on the
|
rownamesAsColumn |
logical vector of length 1 specifying whether the original
rownames in |
rtn.rownames.nm |
character vector of length 1 specifying what the names of the rownames
column should be in the return object. The |
Details
Join is a polished rendition of Reduce(f = plyr::join, x = data.list).
A future version of the function might allow for the init and right
arguments from Reduce.
Value
data.frame of all uniquely colnamed columns from data.list with
the rows included specified by type and rownames specified by rownamesAsColumn.
Similar to plyr::join, Join returns the rows in the same order as
they appeared in data.list.
See Also
Examples
# by column
mtcars1 <- mtcars
mtcars1$"id" <- row.names(mtcars)
mtcars2 <- data.frame("id" = mtcars1$"id", "forward" = 1:32)
mtcars3 <- data.frame("id" = mtcars1$"id", "backward" = 32:1)
mtcars_list <- list(mtcars1, mtcars2, mtcars3)
by_column <- Join(data.list = mtcars_list, by = "id")
by_column2 <- Join(data.list = mtcars_list, by = "id", rownamesAsColumn = TRUE)
by_column3 <- Join(data.list = mtcars_list, by = NULL)
# by rownames
mtcars1 <- mtcars
mtcars2 <- data.frame("forward" = 1:32, row.names = row.names(mtcars))
mtcars3 <- data.frame("backward" = 32:1, row.names = row.names(mtcars))
by_rownm <- Join(data.list = list(mtcars1, mtcars2, mtcars3), by = "0")
by_rownm2 <- Join(data.list = list(mtcars1, mtcars2, mtcars3), by = "0",
rownamesAsColumn = TRUE)
identical(x = by_column[names(by_column) != "id"],
y = by_rownm) # same as converting rownames to a column in the data
identical(x = by_column2[names(by_column2) != "id"],
y = by_rownm2) # same as converting rownames to a column in the data
# inserted NAs (by columns)
mtcars1 <- mtcars[1:4]
mtcars2 <- setNames(obj = as.data.frame(scale(x = mtcars1[-1],
center = TRUE, scale = FALSE)), nm = paste0(names(mtcars1[-1]), "_c"))
mtcars3 <- setNames(obj = as.data.frame(scale(x = mtcars1[-1],
center = FALSE, scale = TRUE)), nm = paste0(names(mtcars1[-1]), "_s"))
tmp <- lapply(X = list(mtcars1, mtcars2, mtcars3), FUN = function(dat)
dat[sample(x = row.names(dat), size = 10), ])
mtcars_list <- lapply(X = tmp, FUN = reshape::namerows)
by_column_NA <- Join(data.list = mtcars_list, by = "id") # join by row.names
by_column_NA2 <- Join(data.list = mtcars_list, by = "id", rownamesAsColumn = TRUE)
identical(x = row.names(by_column_NA), # rownames from any data.frame are retained
y = Reduce(f = union, x = lapply(X = mtcars_list, FUN = row.names)))
# inserted NAs (by rownames)
mtcars1 <- mtcars[1:4]
mtcars2 <- setNames(obj = as.data.frame(scale(x = mtcars1, center = TRUE, scale = FALSE)),
nm = paste0(names(mtcars1), "_c"))
mtcars3 <- setNames(obj = as.data.frame(scale(x = mtcars1, center = FALSE, scale = TRUE)),
nm = paste0(names(mtcars1), "_s"))
mtcars_list <- lapply(X = list(mtcars1, mtcars2, mtcars3), FUN = function(dat)
dat[sample(x = row.names(dat), size = 10), ])
by_rownm_NA <- Join(data.list = mtcars_list, by = "0") # join by row.names
by_rownm_NA2 <- Join(data.list = mtcars_list, by = "0", rownamesAsColumn = TRUE)
identical(x = row.names(by_rownm_NA), # rownames from any data.frame are retained
y = Reduce(f = union, x = lapply(X = mtcars_list, FUN = row.names)))
# types of joins
Join(data.list = mtcars_list, by = "0", type = "left") # only rows included in mtcars1
Join(data.list = mtcars_list, by = "0", type = "right") # only rows included in mtcars3
Join(data.list = mtcars_list, by = "0", type = "inner") # only rows included in
# all 3 data.frames (might be empty due to random chance from sample() call)
# errors returned
tmp <- str2str::try_expr(
Join(data.list = list(mtcars, as.matrix(mtcars), as.matrix(mtcars)))
)
print(tmp[["error"]]) # "The elements with the following positions in
# `data.list` are not data.frames: 2 , 3"
tmp <- str2str::try_expr(
Join(data.list = replicate(n = 3, mtcars, simplify = FALSE), by = 0)
)
print(tmp[["error"]]) # "Assertion on 'by' failed: Must be of type
# 'character' (or 'NULL'), not 'double'."
tmp <- str2str::try_expr(
Join(data.list = replicate(n = 3, mtcars, simplify = FALSE), by = c("0","mpg"))
)
print(tmp[["error"]]) # "If '0' is a value in `by`, then it must be the
# only value and `by` must be length 1."
tmp <- str2str::try_expr(
Join(data.list = list(attitude, attitude, mtcars), by = "mpg")
)
print(tmp[["error"]]) # "The data.frames associated with the following positions in
# `data.list` do not contain the `by` columns: 1 , 2"