sql_hive_text_parse {creditmodel}R Documentation

Automatic production of hive SQL

Description

Returns text parse of hive SQL

Usage

sql_hive_text_parse(
  sql_dt,
  key_sql = NULL,
  key_table = NULL,
  key_id = NULL,
  key_where = c("dt = date_add(current_date(),-1)"),
  only_key = FALSE,
  left_id = NULL,
  left_where = c("dt = date_add(current_date(),-1)"),
  new_name = NULL,
  ...
)

Arguments

sql_dt

The data dictionary has three columns: table, map and feature.

key_sql

You can write your own SQL for the main table.

key_table

Key table.

key_id

Primary key id.

key_where

Key table conditions.

only_key

Only key table.

left_id

Right table's key id.

left_where

Right table conditions.

new_name

A string, Rename all variables except primary key with suffix 'new_name'.

...

Other params.

Value

Text parse of hive SQL

Examples

#sql_dt:table, map and feature
sql_dt = data.frame(table = c("table_1", "table_1",  "table_1", "table_1","table_1",
                               "table_2", "table_2","table_2",
                              "table_2","table_2","table_2","table_2",
                               "table_2","table_2","table_2","table_2",
                              "table_2","table_2","table_2","table_3","table_3",
                               "table_3","table_3","table_3"), 
                   map =  c("all","all", "all","all","all","all","all","all","all","all",
                            "all", "all","all","id_card_info",
                            "id_card_info","id_card_info", "mobile_info","mobile_info",
                            "mobile_info","all", "all","all", "all","all"), 
                   feature =c( "user_id","real_name","id_card_encode","mobile_encode","dt",
                              "user_id","type_code","first_channel",
                               "second_channel","user_name","user_sex","user_birthday",
                                 "user_age","card_province","card_zone",
                               "card_city","city","province","carrier","user_id",
                              "biz_id","biz_code","apply_time","dt"))
#sample 1
sql_hive_text_parse(sql_dt = sql_dt,
          key_sql = NULL,
               key_table = "table_2",
               key_where =  c("user_sex = 'male",
                              "user_age > 20"),
               only_key = FALSE,
               key_id = "user_id",
               left_id = "user_id",
               left_where = c("dt = date_add(current_date(),-1)",
                              "apply_time >= '2020-05-01' "
               ), new_name ="basic"
          )

#sample 2
sql_hive_text_parse(sql_dt = subset(sql_dt),
               key_sql = "SELECT 
       user_id,
       max(apply_time) as max_apply_time
       FROM table_3
       WHERE dt = date_add(current_date(),-1)
               GROUP BY user_id",
               key_id = "user_id",
               left_id = "user_id",
               left_where = c("dt = date_add(current_date(),-1)"
                              ),
               new_name =  NULL)

[Package creditmodel version 1.3.1 Index]