Aggregation of aggregation on Google Bigquery
我的数据看起来像这样
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | WITH test AS ( SELECT * FROM UNNEST([ STRUCT('2019-10-26' as date,'1.8025137' AS article_id, 'Digital Paying' as user_type,'open' as openmode, '123' as uid), ('2019-10-26','1.8025137' , 'Digital Paying','close', '523'), ('2019-10-26','1.8025137' , 'Anonymous','open', '321'), ('2019-10-26','1.8025137' , 'Registered','close', '231'), ('2019-10-26','1.8025137' , 'Registered','open', '431'), ('2019-10-26','1.8025137' , 'Digital Paying','close', '132'), ('2019-10-26','1.8025137' , 'Anonymous','close', '111') ]) ), -- first level of aggregation, prepare for fine tuning date_article as ( SELECT date, article_id, ARRAY_AGG(struct(user_type,openmode, uid)) AS ut FROM test GROUP BY 1,2 ) (SELECT date, article_id, -- feed sub-query output into an array"action" array(SELECT AS STRUCT user_type as user_type, -- re-group data within the array by field"action" array_agg(struct(openmode as openmode,uid as uid) ) op FROM UNNEST(ut) GROUP BY 1 ) as user_types FROM date_article) |
我的目标是通过以下方式聚合user_types.op.openmode和user_types.op.uid
user_types.user_type不创建任何重复项,如:
我认为您正在寻找以下
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | #standardSQL WITH test AS ( SELECT * FROM UNNEST([ STRUCT('2019-10-26' AS DATE,'1.8025137' AS article_id, 'Digital Paying' AS user_type,'open' AS openmode, '123' AS uid), ('2019-10-26','1.8025137' , 'Digital Paying','close', '523'), ('2019-10-26','1.8025137' , 'Anonymous','open', '321'), ('2019-10-26','1.8025137' , 'Registered','close', '231'), ('2019-10-26','1.8025137' , 'Registered','open', '431'), ('2019-10-26','1.8025137' , 'Digital Paying','close', '132'), ('2019-10-26','1.8025137' , 'Anonymous','close', '111') ]) ), users_agg AS ( SELECT DATE, article_id, user_type, openmode, COUNT(DISTINCT uid) AS uids FROM test GROUP BY 1,2,3,4 ), modes_agg AS ( SELECT DATE, article_id, user_type, ARRAY_AGG(STRUCT(openmode, uids)) AS modes FROM users_agg GROUP BY 1,2,3 ), types_agg AS ( SELECT DATE, article_id, ARRAY_AGG(STRUCT(user_type, modes)) types FROM modes_agg GROUP BY 1,2 ), article_agg AS ( SELECT DATE, ARRAY_AGG(STRUCT(article_id, types)) articles FROM types_agg GROUP BY 1 ) SELECT * FROM article_agg |
结果为
您使它变得比必要的更为复杂。如果可能的话,请先执行"普通" SQL,然后再格式化为数组/结构。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | WITH test AS ( SELECT * FROM UNNEST([ STRUCT('2019-10-26' as date,'1.8025137' AS article_id, 'Digital Paying' as user_type,'open' as openmode, '123' as uid), ('2019-10-26','1.8025137' , 'Digital Paying','close', '523'), ('2019-10-26','1.8025137' , 'Anonymous','open', '321'), ('2019-10-26','1.8025137' , 'Registered','close', '231'), ('2019-10-26','1.8025137' , 'Registered','open', '431'), ('2019-10-26','1.8025137' , 'Digital Paying','close', '132'), ('2019-10-26','1.8025137' , 'Anonymous','close', '111') ]) ), agg as ( select date, article_id, user_type, openmode, count(distinct uid) as uids from test group by 1,2,3,4 ), final as ( select date, article_id, user_type, array_agg(struct(openmode, uids)) as subfields from agg group by 1,2,3 ) select * from final |