word_position {qdap} | R Documentation |
Word Position
Description
Find counts of the positioning of words within a sentence.
Usage
word_position(
text.var,
match.terms,
digits = 2,
percent = TRUE,
zero.replace = 0,
...
)
Arguments
text.var |
The text variable. |
match.terms |
A character vector of quoted terms to find the positions of. |
digits |
Integer; number of decimal places to round when printing. |
percent |
logical. If |
zero.replace |
Value to replace 0 values with. |
... |
Currently ignored. |
Value
Returns a list, of class "word_position", of data frames and information regarding word positions:
raw |
raw word position counts in long format (may be more useful for plotting) |
count |
integer word position counts |
prop |
proportional word position counts; proportional to each total word uses |
rnp |
a character combination data frame of count and proportional |
zero_replace |
value to replace zeros with; mostly internal use |
percent |
The value of percent used for plotting purposes. |
digits |
integer value of number of digits to display; mostly internal use |
Note
Default printing is a heatmap plot.
Examples
## Not run:
position <- with(DATA, word_position(sent_detect(state), Top25Words))
position
lview(position)
plot(position)
scores(position)
preprocessed(position)
counts(position)
proportions(position)
plot(proportions(position))
stopwords <- unique(c(contractions[[1]], Top200Words))
topwords <- freq_terms(pres_debates2012[["dialogue"]], top = 40,
at.least = 4, stopwords = stopwords)[[1]]
word_position(pres_debates2012[["dialogue"]], topwords)
plot(word_position(pres_debates2012[["dialogue"]], topwords), FALSE)
plot(word_position(pres_debates2012[["dialogue"]], topwords), TRUE, scale=FALSE)
wordlist <- c("tax", "health", "rich", "america", "truth", "money", "cost",
"governnor", "president", "we", "job", "i", "you", "because",
"our", "years")
word_position(pres_debates2012[["dialogue"]], wordlist)
## BY VARIABLES
library(gridExtra)
pres_deb_by_time <- with(pres_debates2012, split(dialogue, time))
out1 <-lapply(pres_deb_by_time, word_position, wordlist)
do.call("grid.arrange", c(lapply(out1, plot), ncol=1))
pres_deb_by_person <- with(pres_debates2012, split(dialogue, person))
out2 <-lapply(pres_deb_by_person, word_position, wordlist)
plots <- lapply(names(out2), function(x) plot(out2[[x]], scale=FALSE) +
ggtitle(x))
do.call("grid.arrange", c(plots, ncol=2))
## As a histogram
## theme taken from: http://jonlefcheck.net/2013/03/11/black-theme-for-ggplot2-2/
theme_black <- function(base_size=12,base_family="") {
theme_grey(base_size=base_size,base_family=base_family) %+replace%
theme(
# Specify axis options
axis.line=element_blank(),
axis.text.x=element_text(size=base_size*0.8,color="grey55",
lineheight=0.9,vjust=1),
axis.text.y=element_text(size=base_size*0.8,color="grey55",
lineheight=0.9,hjust=1),
axis.ticks=element_line(color="grey55",size = 0.2),
axis.title.x=element_text(size=base_size,color="grey55",vjust=1),
axis.title.y=element_text(size=base_size,color="grey55",angle=90,
vjust=0.5),
axis.ticks.length=unit(0.3,"lines"),
axis.ticks.margin=unit(0.5,"lines"),
# Specify legend options
legend.background=element_rect(color=NA,fill="black"),
legend.key=element_rect(color="grey55", fill="black"),
legend.key.size=unit(1.2,"lines"),
legend.key.height=NULL,
legend.key.width=NULL,
legend.text=element_text(size=base_size*0.8,color="grey55"),
legend.title=element_text(size=base_size*0.8,face="bold",hjust=0,
color="grey55"),
legend.position="right",
legend.text.align=NULL,
legend.title.align=NULL,
legend.direction="vertical",
legend.box=NULL,
# Specify panel options
panel.background=element_rect(fill="black",color = NA),
panel.border=element_rect(fill=NA,color="grey55"),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
panel.spacing=unit(0.25,"lines"),
# Specify facetting options
strip.background=element_rect(fill="grey30",color="grey10"),
strip.text.x=element_text(size=base_size*0.8,color="grey55"),
strip.text.y=element_text(size=base_size*0.8,color="grey55",
angle=-90),
# Specify plot options
plot.background=element_rect(color="black",fill="black"),
plot.title=element_text(size=base_size*1.2,color="grey55"),
plot.margin=unit(c(1,1,0.5,0.5),"lines")
)
}
out3 <- list_df2df(lapply(out2[1:2], preprocessed), "Person")
out3 %>% ggplot(aes(x=position)) +
geom_histogram(binwidth = 1, fill="white") +
facet_grid(Person~word) +
theme_black() + ylab("Count") + xlab("Position")
## MOVE TO THE MICRO THROUGH QUALITATIVE ANALYSIS
locs <- unlist(setNames(lapply(wordlist, function(x){
sapply(c("ROMNEY", "OBAMA"), function(y){
which(pres_debates2012[["person"]] ==y & grepl(x, pres_debates2012[["dialogue"]]))
})
}), wordlist), recursive=FALSE)
fdl <- qdap:::folder(pres_context)
Map(function(x, y){
if (identical(integer(0), x)) return(NULL)
z <- with(pres_debates2012, trans_context(dialogue, person, inds=x, n.before=1))
z[["text"]] <- gsub(beg2char(y, "."),
paste0("[[", beg2char(y, "."), "]]"), z[["text"]])
print(z, file=file.path(fdl, sprintf("%s.doc", y)))
}, locs, names(locs))
## End(Not run)