cooccurrences {polmineR} | R Documentation |
Get cooccurrence statistics.
Description
Get cooccurrence statistics.
Usage
cooccurrences(.Object, ...)
## S4 method for signature 'corpus'
cooccurrences(
.Object,
query,
cqp = is.cqp,
p_attribute = getOption("polmineR.p_attribute"),
boundary = NULL,
left = getOption("polmineR.left"),
right = getOption("polmineR.right"),
stoplist = NULL,
positivelist = NULL,
regex = FALSE,
keep = NULL,
cpos = NULL,
method = "ll",
mc = getOption("polmineR.mc"),
verbose = FALSE,
progress = FALSE,
...
)
## S4 method for signature 'character'
cooccurrences(
.Object,
query,
cqp = is.cqp,
p_attribute = getOption("polmineR.p_attribute"),
boundary = NULL,
left = getOption("polmineR.left"),
right = getOption("polmineR.right"),
stoplist = NULL,
positivelist = NULL,
regex = FALSE,
keep = NULL,
cpos = NULL,
method = "ll",
mc = getOption("polmineR.mc"),
verbose = FALSE,
progress = FALSE,
...
)
## S4 method for signature 'slice'
cooccurrences(
.Object,
query,
cqp = is.cqp,
left = getOption("polmineR.left"),
right = getOption("polmineR.right"),
p_attribute = getOption("polmineR.p_attribute"),
boundary = NULL,
stoplist = NULL,
positivelist = NULL,
keep = NULL,
method = "ll",
mc = FALSE,
progress = TRUE,
verbose = FALSE,
...
)
## S4 method for signature 'partition'
cooccurrences(
.Object,
query,
cqp = is.cqp,
left = getOption("polmineR.left"),
right = getOption("polmineR.right"),
p_attribute = getOption("polmineR.p_attribute"),
boundary = NULL,
stoplist = NULL,
positivelist = NULL,
keep = NULL,
method = "ll",
mc = FALSE,
progress = TRUE,
verbose = FALSE,
...
)
## S4 method for signature 'subcorpus'
cooccurrences(
.Object,
query,
cqp = is.cqp,
left = getOption("polmineR.left"),
right = getOption("polmineR.right"),
p_attribute = getOption("polmineR.p_attribute"),
boundary = NULL,
stoplist = NULL,
positivelist = NULL,
keep = NULL,
method = "ll",
mc = FALSE,
progress = TRUE,
verbose = FALSE,
...
)
## S4 method for signature 'context'
cooccurrences(.Object, method = "ll", verbose = FALSE)
## S4 method for signature 'partition_bundle'
cooccurrences(
.Object,
query,
verbose = FALSE,
mc = getOption("polmineR.mc"),
...
)
## S4 method for signature 'Cooccurrences'
cooccurrences(.Object, query)
## S4 method for signature 'remote_corpus'
cooccurrences(.Object, ...)
## S4 method for signature 'remote_subcorpus'
cooccurrences(.Object, ...)
Arguments
.Object |
A |
... |
Further parameters that will be passed into bigmatrix (applies only of big = TRUE). |
query |
A query, either a character vector to match a token, or a CQP query. |
cqp |
Defaults to |
p_attribute |
The p-attribute of the tokens/the query. |
boundary |
If provided, it will be checked that the corpus positions of windows do not extend beyond the left and right boundaries of the region defined by the s-attribute where the match occurs. |
left |
A single |
right |
A single |
stoplist |
Exclude a query hit from analysis if stopword(s) is/are in
context (relevant only if query is not |
positivelist |
Character vector or numeric vector: include a query hit
only if token in |
regex |
A |
keep |
list with tokens to keep |
cpos |
integer vector with corpus positions, defaults to NULL - then the corpus positions for the whole corpus will be used |
method |
The statistical test(s) to use (defaults to "ll"). |
mc |
whether to use multicore |
verbose |
A |
progress |
A |
Value
a cooccurrences-class object
Author(s)
Andreas Blaette
References
Baker, Paul (2006): Using Corpora in Discourse Analysis. London: continuum, p. 95-120 (ch. 5).
Manning, Christopher D.; Schuetze, Hinrich (1999): Foundations of Statistical Natural Language Processing. MIT Press: Cambridge, Mass., pp. 151-189 (ch. 5).
See Also
See the documentation for the ll
-method for an
explanation of the computation of the log-likelihood statistic.
Examples
use("polmineR")
use(pkg = "RcppCWB", corpus = "REUTERS")
merkel <- partition("GERMAPARLMINI", interjection = "speech", speaker = ".*Merkel", regex = TRUE)
merkel <- enrich(merkel, p_attribute = "word")
cooc <- cooccurrences(merkel, query = "Deutschland")
# use subset-method to filter results
a <- cooccurrences("REUTERS", query = "oil")
b <- subset(a, !is.na(ll))
c <- subset(b, !word %in% tm::stopwords("en"))
d <- subset(c, count_coi >= 5)
e <- subset(c, ll >= 10.83)
format(e)
# using pipe operator with subset
cooccurrences("REUTERS", query = "oil") %>%
subset(!is.na(ll)) %>%
subset(!word %in% tm::stopwords("en")) %>%
subset(count_coi >= 5) %>%
subset(ll >= 10.83) %>%
format()
# generate datatables htmlwidget with buttons for export (Excel & more)
# (alternatively use openxlsx::write.xlsx())
interactive_table <- cooccurrences("REUTERS", query = "oil") %>%
format() %>%
DT::datatable(
extensions = "Buttons",
options = list(dom = 'Btip', buttons = c("excel", "pdf", "csv"))
)
if (interactive()) show(interactive_table)
# compute cooccurrences for a set of partitions
# (example not run by default to save time on test machines)
## Not run:
pb <- partition_bundle("GERMAPARLMINI", s_attribute = "speaker")
ps <- count(pb, query = "Deutschland")[Deutschland >= 25][["partition"]]
pb_min <- pb[ps]
y <- cooccurrences(pb_min, query = "Deutschland")
if (interactive()) y[[1]]
if (interactive()) y[[2]]
y2 <- corpus("GERMAPARLMINI") %>%
subset(speaker %in% c("Hubertus Heil", "Angela Dorothea Merkel")) %>%
split(s_attribute = "speaker") %>%
cooccurrences(query = "Deutschland")
## End(Not run)