| fparse {RcppSimdJson} | R Documentation |
Fast, Friendly, and Flexible JSON Parsing
Description
Parse JSON strings and files to R objects.
Usage
fparse(
json,
query = NULL,
empty_array = NULL,
empty_object = NULL,
single_null = NULL,
parse_error_ok = FALSE,
on_parse_error = NULL,
query_error_ok = FALSE,
on_query_error = NULL,
max_simplify_lvl = c("data_frame", "matrix", "vector", "list"),
type_policy = c("anything_goes", "numbers", "strict"),
int64_policy = c("double", "string", "integer64", "always"),
always_list = FALSE
)
fload(
json,
query = NULL,
empty_array = NULL,
empty_object = NULL,
single_null = NULL,
parse_error_ok = FALSE,
on_parse_error = NULL,
query_error_ok = FALSE,
on_query_error = NULL,
max_simplify_lvl = c("data_frame", "matrix", "vector", "list"),
type_policy = c("anything_goes", "numbers", "strict"),
int64_policy = c("double", "string", "integer64", "always"),
always_list = FALSE,
verbose = FALSE,
temp_dir = tempdir(),
keep_temp_files = FALSE,
compressed_download = FALSE,
...
)
Arguments
json |
JSON strings, file paths, or raw vectors.
|
query |
If not |
empty_array |
Any R object to return for empty JSON arrays.
default: |
empty_object |
Any R object to return for empty JSON objects.
default: |
single_null |
Any R object to return for single JSON nulls.
default: |
parse_error_ok |
Whether to allow parsing errors.
default: |
on_parse_error |
If |
query_error_ok |
Whether to allow parsing errors.
default: |
on_query_error |
If |
max_simplify_lvl |
Maximum simplification level.
|
type_policy |
Level of type strictness.
|
int64_policy |
How to return big integers to R.
|
always_list |
Whether a |
verbose |
Whether to display status messages.
|
temp_dir |
Directory path to use for any temporary files.
|
keep_temp_files |
Whether to remove any temporary files created by
|
compressed_download |
Whether to request server-side compression on
the downloaded document, default: |
... |
Optional arguments which can be use e.g. to pass additional header settings |
Details
Instead of using
lapply()to parse multiple values, just usefparse()andfload()directly.They are vectorized in order to leverage the underlying
simdjson::dom::parser's ability to reuse its internal buffers between parses.Since the overwhelming majority of JSON parsed will not result in scalars, a
list()is always returned ifjsoncontains more than one value.If
jsoncontains multiple values and hasnames(), the returned object will have the same names.If
jsoncontains multiple values and is unnamed,fload()names each returned element using the file'sbasename().
-
query's goal is to minimize te amount of data that must be materialized as R objects (the main performance bottleneck) as well as facilitate any post-parse processing.To maximize flexibility, there are two approaches to consider when designing
queryarguments.-
charactervectors are interpreted as containing queries that meant to be applied to all elements ofjson=.If
json=contains 3 strings andquery=contains 3 strings, the returned object will be a list of 3 elements (1 for each element ofjson=), which themselves each contain 3 lists (1 for each element ofquery=).
-
lists ofcharactervectors are interpreted as containing queries meant to be applied tojsonin a zip-like fashion.
-
Author(s)
Brendan Knapp
Examples
# simple parsing ============================================================
json_string <- '{"a":[[1,null,3.0],["a","b",true],[10000000000,2,3]]}'
fparse(json_string)
raw_json <- as.raw(
c(0x22, 0x72, 0x61, 0x77, 0x20, 0x62, 0x79, 0x74, 0x65, 0x73, 0x20, 0x63,
0x61, 0x6e, 0x20, 0x62, 0x65, 0x63, 0x6f, 0x6d, 0x65, 0x20, 0x4a, 0x53,
0x4f, 0x4e, 0x20, 0x74, 0x6f, 0x6f, 0x21, 0x22)
)
fparse(raw_json)
# ensuring a list is always returned ========================================
fparse(json_string, always_list = TRUE)
fparse(c(named_single_element_character = json_string), always_list = TRUE)
# controlling type-strictness ===============================================
fparse(json_string, type_policy = "numbers")
fparse(json_string, type_policy = "strict")
fparse(json_string, type_policy = "numbers", int64_policy = "string")
if (requireNamespace("bit64", quietly = TRUE)) {
fparse(json_string, type_policy = "numbers", int64_policy = "integer64")
}
# vectorized parsing ========================================================
json_strings <- c(
json1 = '[{"b":true,
"c":null},
{"b":[[1,2,3],
[4,5,6]],
"c":"Q"}]',
json2 = '[{"b":[[7, 8, 9],
[10,11,12]],
"c":"Q"},
{"b":[[13,14,15],
[16,17,18]],
"c":null}]'
)
fparse(json_strings)
fparse(
list(
raw_json1 = as.raw(c(0x74, 0x72, 0x75, 0x65)),
raw_json2 = as.raw(c(0x66, 0x61, 0x6c, 0x73, 0x65))
)
)
# controlling simplification ================================================
fparse(json_strings, max_simplify_lvl = "matrix")
fparse(json_strings, max_simplify_lvl = "vector")
fparse(json_strings, max_simplify_lvl = "list")
# customizing what `[]`, `{}`, and single `null`s return ====================
empties <- "[[],{},null]"
fparse(empties)
fparse(empties,
empty_array = logical(),
empty_object = `names<-`(list(), character()),
single_null = NA_real_)
# handling invalid JSON and parsing errors ==================================
fparse("junk JSON", parse_error_ok = TRUE)
fparse("junk JSON", parse_error_ok = TRUE,
on_parse_error = "can't parse invalid JSON")
fparse(
c(junk_JSON_1 = "junk JSON 1",
valid_JSON_1 = '"this is valid JSON"',
junk_JSON_2 = "junk JSON 2",
valid_JSON_2 = '"this is also valid JSON"'),
parse_error_ok = TRUE,
on_parse_error = NA
)
# querying JSON w/ a JSON Pointer ===========================================
json_to_query <- c(
json1 = '[
"a",
{
"b": {
"c": [[1,2,3],
[4,5,6]]
}
}
]',
json2 = '[
"a",
{
"b": {
"c": [[7,8,9],
[10,11,12]],
"d": [1,2,3,4]
}
}
]')
fparse(json_to_query, query = "/1")
fparse(json_to_query, query = "/1/b")
fparse(json_to_query, query = "/1/b/c")
fparse(json_to_query, query = "/1/b/c/1")
fparse(json_to_query, query = "/1/b/c/1/0")
# handling invalid queries ==================================================
fparse(json_to_query, query = "/1/b/d",
query_error_ok = TRUE,
on_query_error = "d isn't a key here!")
# multiple queries applied to EVERY element =================================
fparse(json_to_query, query = c(query1 = "/1/b/c/1/0",
query2 = "/1/b/c/1/1",
query3 = "/1/b/c/1/2"))
# multiple queries applied to EACH element ==================================
fparse(json_to_query,
query = list(queries_for_json1 = c(c1 = "/1/b/c/1/0",
c2 = "/1/b/c/1/1"),
queries_for_json2 = c(d1 = "/1/b/d/1",
d2 = "/1/b/d/2")))
# load JSON files ===========================================================
single_file <- system.file("jsonexamples/small/demo.json", package = "RcppSimdJson")
fload(single_file)
multiple_files <- c(
single_file,
system.file("jsonexamples/small/smalldemo.json", package = "RcppSimdJson")
)
fload(multiple_files)
## Not run:
# load remote JSON ==========================================================
a_url <- "https://api.github.com/users/lemire"
fload(a_url)
multiple_urls <- c(
a_url,
"https://api.github.com/users/eddelbuettel",
"https://api.github.com/users/knapply",
"https://api.github.com/users/dcooley"
)
fload(multiple_urls, query = "name", verbose = TRUE)
# download compressed (faster) JSON =========================================
fload(multiple_urls, query = "name", verbose = TRUE,
compressed_download = TRUE)
## End(Not run)