run_pipeline_gbq {condusco} | R Documentation |
A wrapper for running pipelines with a BigQuery invocation query
Description
A wrapper for running pipelines with a BigQuery invocation query
Usage
run_pipeline_gbq(pipeline, query, project, ...)
Arguments
pipeline |
User-provided function with one argument, one row of query results |
query |
A query to execute in Google BigQuery |
project |
The Google BigQuery project to bill |
... |
Additional arguments passed to query_exec() |
Examples
## Not run:
library(whisker)
#Set GBQ project
project <- ''
#Set the following options for GBQ authentication on a cloud instance
options("httr_oauth_cache" = "~/.httr-oauth")
options(httr_oob_default=TRUE)
#Run the below query to authenticate and write credentials to .httr-oauth file
query_exec("SELECT 'foo' as bar",project=project);
pipeline <- function(params){
query <- "
SELECT
{{#list}}
SUM(CASE WHEN author.name ='{{name}}' THEN 1 ELSE 0 END) as n_{{name_clean}},
{{/list}}
repo_name
FROM `bigquery-public-data.github_repos.sample_commits`
GROUP BY repo_name
;"
res <- query_exec(
whisker.render(query,params),
project=project,
use_legacy_sql = FALSE
);
print(res)
}
run_pipeline_gbq(pipeline, "
SELECT CONCAT('[',
STRING_AGG(
CONCAT('{\"name\":\"',name,'\",'
,'\"name_clean\":\"', REGEXP_REPLACE(name, r'[^[:alpha:]]', ''),'\"}'
)
),
']') as list
FROM (
SELECT author.name,
COUNT(commit) n_commits
FROM `bigquery-public-data.github_repos.sample_commits`
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10
)
",
project,
use_legacy_sql = FALSE
)
## End(Not run)
[Package condusco version 0.1.0 Index]