R: Convert data to long or to wide form

to_long {maditr}

R Documentation

Convert data to long or to wide form

Description

to_long increases number of rows in the dataset and reduce number of columns. to_wide makes invert transformation. You can use cols for selecting variables in the arguments. See examples.

Usage

to_long(
  data,
  columns = NULL,
  keep = NULL,
  names_in = "variable",
  values_in = "value",
  drop_na = FALSE,
  names_factor = TRUE,
  value_factor = FALSE,
  ...
)

to_wide(
  data,
  keep = NULL,
  names_in = variable,
  values_in = value,
  fun = identity,
  sep = "_",
  fill = NA,
  missing_comb = c("none", "rows", "columns", "all"),
  ...
)

Arguments

`data`	A data.frame to convert
`columns`	unquoted names of variables for stacking. When missing, we will stack all columns outside `keep` columns.
`keep`	unquoted names of columns which will be kept as is, e. g. only recycled or deduplicated. If missing, it is all columns except stacked or unstacked. If `FALSE` then nothing will be kept.
`names_in`	name of the stacked variable names column. The default name is 'variable'. It is quoted in the `to_long` and unquoted in `to_wide`. If `FALSE` in the `to_wide` than nothing will be widening.
`values_in`	name(-s) of the stacked data values column(s). The default name is 'value'. Multiple names can be provided here for the case when `columns` is a list, though note well that the names provided in `columns` take precedence. It is quoted in the `to_long` and unqoted in `to_wide`
`drop_na`	If TRUE, NA values will be removed from the stacked data.
`names_factor`	If TRUE, the column with names will be converted to factor, else it will be a character column. TRUE by default.
`value_factor`	If TRUE, the value column will be converted to factor, else the stacked values type is left unchanged. FALSE by default.
`...`	other arguments passed to `data.table::melt`/`data.table::dcast`
`fun`	Should the data be aggregated before casting? By default, it is `identity` - no aggregation. To use multiple aggregation functions, pass a list; see Examples.
`sep`	Character vector of length 1, indicating the separating character in variable names generated during casting. Default is "_".
`fill`	Value with which to fill missing cells. `NA` by default. If `fun` is present, takes the value by applying the function on a 0-length vector.
`missing_comb`	One of "none" (the default), "rows" - include missing combinations in rows, "columns" - include missing combinations in columns, and "all" include all missing combinations.

Value

data.table in the wide or long form.

Examples

data(iris)

# 'to_long'

long_iris = iris %>%
    to_long(keep = Species)

long_iris

iris_with_stat = long_iris %>%
    take(mean = mean(value),
         sd = sd(value),
         n = .N*1.0,
         by = .(Species, variable)
    ) %>%
    to_long(columns = c(mean, sd, n), names_in = "stat")

# 'to_wide' - table with multiple stats
iris_with_stat %>%
    to_wide()


iris_with_stat %>%
    to_wide(names_in = c(variable, stat))

iris_with_stat %>%
    to_wide(names_in = c(variable, Species))

# 'to_wide' - aggregation function
long_iris %>%
    to_wide(fun = list(Mean = mean, SD = sd, N = length))

# multiple variables
iris %>%
    to_long(list(Sepal = cols("^Sepal"), Petal = cols("^Petal"))) %>%
    let(
        variable = factor(variable, levels = 1:2, labels = c("Length", "Width"))
    ) %>%
    to_wide(values_in = c(Sepal, Petal))

# '%to%' selector - example from tidyr::pivot_longer

data(anscombe)
anscombe %>%
    to_long(
        list(x = x1 %to% x4, y = y1 %to% y4),
        names_in = "set"
    )

######################################
## Examples from data.table melt/dcast
######################################

set.seed(45)
DT = data.table(
    i_1 = c(1:5, NA)*1.0,
    i_2 = c(NA,6,7,8,9,10)*1.0,
    f_1 = factor(sample(c(letters[1:3], NA), 6, TRUE)),
    f_2 = factor(c("z", "a", "x", "c", "x", "x"), ordered=TRUE),
    c_1 = sample(c(letters[1:3], NA), 6, TRUE),
    d_1 = as.Date(c(1:3,NA,4:5), origin="2013-09-01"),
    d_2 = as.Date(6:1, origin="2012-01-01")
)

# id, values as character/integer/numeric vectors

to_long(DT, f_1, keep = 1:2)
to_long(DT, f_1, keep = c(i_1, i_2))
to_long(DT, f_1, keep = i_1 %to% i_2)
to_long(DT, f_1, keep = cols(i_1:i_2), names_factor = FALSE)
to_long(DT, f_1, keep = cols("i_{1:2}"))
to_long(DT, f_1, keep = cols("^i_"))
to_long(DT, f_1, keep = cols("^i_"), names_in = "var", values_in = "val")

col_var = "^i_"
to_long(DT, 3, keep = cols(col_var))

to_long(DT, cols("^f_"), keep = cols("^i_"), value_factor = TRUE)

to_long(mtcars)
to_long(mtcars, keep = am)
to_long(mtcars, columns = c(am, vs, mpg))
to_long(mtcars, columns = c(am, vs, mpg), keep = FALSE)
to_long(DT, keep = f_1, columns = c(i_1, i_2), drop_na = TRUE)
to_long(DT, keep=1:2, columns = list(cols("^f_"), cols("^d_")), value_factor=TRUE)

data("ChickWeight")
names(ChickWeight) = tolower(names(ChickWeight))
DT = to_long(ChickWeight, keep=2:4)

to_wide(DT, keep = time, fun = mean)
to_wide(DT, keep = FALSE, fun = mean)
to_wide(DT, keep = diet, fun = mean)
to_wide(DT, keep = c(diet, chick), names_in = time, missing_comb = "all")
to_wide(DT, keep = c(diet, chick), names_in = time, missing_comb = "all", fill = 0)
to_wide(DT, chick, time, fun = mean)



# using FALSE
DT = data.table(v1 = rep(1:2, each = 6),
                v2 = rep(rep(1:3, 2), each = 2),
                v3 = rep(1:2, 6),
                v4 = rnorm(6))

## for each combination of (v1, v2), add up all values of v4
to_wide(DT,
        cols("^v(1|2)"),
        names_in = FALSE,
        values_in = v4,
        fun = sum
)

# multiple values_in and multiple fun
DT = data.table(x=sample(5,20,TRUE),
                y=sample(2,20,TRUE),
                z=sample(letters[1:2], 20,TRUE),
                d1 = runif(20),
                d2=1L)

# multiple values_in
to_wide(DT,
        keep = c(x, y),
        names_in = z,
        values_in = c(d1, d2),
        fun = sum,
        fill = 0)

# multiple funs
to_wide(DT,
        keep = c(x, y),
        names_in = z,
        values_in = d1,
        fun = list(sum = sum, mean = mean),
        fill = NULL)

# multiple fun and values_in (all combinations)
to_wide(DT,
        keep = c(x, y),
        names_in = z,
        values_in = c(d1, d2),
        fun = list(sum = sum, mean = mean)
)

# multiple fun and values_in (one-to-one)
to_wide(DT,
        keep = c(x, y),
        names_in = z,
        values_in = list(d1, d2),
        fun = list(sum = sum, mean = mean)
)

[Package maditr version 0.8.4 Index]