tidy_dtm {textshape} | R Documentation |
Convert a
DocumentTermMatrix
/TermDocumentMatrix
into Tidy Form
Description
Converts non-zero elements of a
DocumentTermMatrix
/TermDocumentMatrix
into a tidy data set.
Usage
tidy_dtm(x, ...)
tidy_tdm(x, ...)
Arguments
x |
|
... |
ignored. |
Value
Returns a tidied data.frame.
Examples
data(simple_dtm)
tidy_dtm(simple_dtm)
## Not run:
if (!require("pacman")) install.packages("pacman")
pacman::p_load_current_gh('trinker/gofastr')
pacman::p_load(tidyverse, magrittr, ggstance)
my_dtm <- with(
presidential_debates_2012,
q_dtm(dialogue, paste(time, tot, sep = "_"))
)
tidy_dtm(my_dtm) %>%
tidyr::extract(
col = doc,
into = c("time", "turn", "sentence"),
regex = "(\\d)_(\\d+)\\.(\\d+)"
) %>%
mutate(
time = as.numeric(time),
turn = as.numeric(turn),
sentence = as.numeric(sentence)
) %>%
tbl_df() %T>%
print() %>%
group_by(time, term) %>%
summarize(n = sum(n)) %>%
group_by(time) %>%
arrange(desc(n)) %>%
slice(1:10) %>%
ungroup() %>%
mutate(
term = factor(paste(term, time, sep = "__"),
levels = rev(paste(term, time, sep = "__")))
) %>%
ggplot(aes(x = n, y = term)) +
geom_barh(stat='identity') +
facet_wrap(~time, ncol=2, scales = 'free_y') +
scale_y_discrete(labels = function(x) gsub("__.+$", "", x))
## End(Not run)
[Package textshape version 1.7.5 Index]