关于struct:Google Bigquery上的汇总汇总

Aggregation of aggregation on Google Bigquery

我的数据看起来像这样

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
WITH test AS (
  SELECT * FROM UNNEST([
    STRUCT('2019-10-26' as date,'1.8025137' AS article_id, 'Digital Paying' as user_type,'open' as openmode, '123' as uid),
    ('2019-10-26','1.8025137' , 'Digital Paying','close', '523'),
    ('2019-10-26','1.8025137' , 'Anonymous','open', '321'),
    ('2019-10-26','1.8025137' , 'Registered','close', '231'),
    ('2019-10-26','1.8025137' , 'Registered','open', '431'),
    ('2019-10-26','1.8025137' , 'Digital Paying','close', '132'),
    ('2019-10-26','1.8025137' , 'Anonymous','close', '111')
  ])  
),
-- first level of aggregation, prepare for fine tuning
date_article as (
  SELECT
    date,
    article_id,
    ARRAY_AGG(struct(user_type,openmode, uid)) AS ut
  FROM test
  GROUP BY 1,2
)

 (SELECT
  date,
  article_id,
  -- feed sub-query output into an array"action"
  array(SELECT AS STRUCT
     user_type as user_type, -- re-group data within the array by field"action"
     array_agg(struct(openmode as openmode,uid as uid) ) op
   FROM UNNEST(ut)
   GROUP BY 1
   ) as user_types
FROM date_article)

我的目标是通过以下方式聚合user_types.op.openmode和user_types.op.uid
user_types.user_type不创建任何重复项,如:

enter image description here


我认为您正在寻找以下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#standardSQL
WITH test AS (
  SELECT * FROM UNNEST([
    STRUCT('2019-10-26' AS DATE,'1.8025137' AS article_id, 'Digital Paying' AS user_type,'open' AS openmode, '123' AS uid),
    ('2019-10-26','1.8025137' , 'Digital Paying','close', '523'),
    ('2019-10-26','1.8025137' , 'Anonymous','open', '321'),
    ('2019-10-26','1.8025137' , 'Registered','close', '231'),
    ('2019-10-26','1.8025137' , 'Registered','open', '431'),
    ('2019-10-26','1.8025137' , 'Digital Paying','close', '132'),
    ('2019-10-26','1.8025137' , 'Anonymous','close', '111')
  ])  
), users_agg AS (
  SELECT DATE, article_id, user_type, openmode, COUNT(DISTINCT uid) AS uids
  FROM test GROUP BY 1,2,3,4
), modes_agg AS (
  SELECT DATE, article_id, user_type, ARRAY_AGG(STRUCT(openmode, uids)) AS modes
  FROM users_agg GROUP BY 1,2,3
), types_agg AS (
  SELECT DATE, article_id, ARRAY_AGG(STRUCT(user_type, modes)) types
  FROM modes_agg GROUP BY 1,2
), article_agg AS (
  SELECT DATE, ARRAY_AGG(STRUCT(article_id, types)) articles
  FROM types_agg GROUP BY 1
)
SELECT *
FROM article_agg

结果为

enter image description here


您使它变得比必要的更为复杂。如果可能的话,请先执行"普通" SQL,然后再格式化为数组/结构。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
WITH test AS (
  SELECT * FROM UNNEST([
    STRUCT('2019-10-26' as date,'1.8025137' AS article_id, 'Digital Paying' as user_type,'open' as openmode, '123' as uid),
    ('2019-10-26','1.8025137' , 'Digital Paying','close', '523'),
    ('2019-10-26','1.8025137' , 'Anonymous','open', '321'),
    ('2019-10-26','1.8025137' , 'Registered','close', '231'),
    ('2019-10-26','1.8025137' , 'Registered','open', '431'),
    ('2019-10-26','1.8025137' , 'Digital Paying','close', '132'),
    ('2019-10-26','1.8025137' , 'Anonymous','close', '111')
  ])  
),
agg as (
  select
    date,
    article_id,
    user_type,
    openmode,
    count(distinct uid) as uids
  from test
  group by 1,2,3,4
),
final as (
  select
    date,
    article_id,
    user_type,
    array_agg(struct(openmode, uids)) as subfields
  from agg
  group by 1,2,3
)
select * from final