Groupby 구문,GroupBy 고급 기능

11287 단어 Hive
문법:groupByClause: GROUP BY groupByExpression (, groupByExpression)* groupByExpression: expression groupByQuery: SELECT expression (, expression)* FROM srcgroupByClause?
고급 사용: GroupBy Group By Map-Side GROUPING SETS CUBE ROLL UP Grouping_ID Grouping function
Group By 문법groupByClause: GROUP BY groupByExpression (, groupByExpression)* groupByExpression: expression groupByQuery: SELECT expression (, expression)* FROM src groupByClause? :Group By , 0.11 : 0.11.0 - 2.1.x, hive.groupby.orderby.position.alias = true ( false). 2.2.0 , hive.groupby.position.alias = true ( false).
간단 한 예 :
SELECT COUNT() FROM table2; 물론 COUNT(1)를 사용 하여 COUNT()를 교체 할 수 있 습 니 다.그룹 을 나 눈 후 각 그룹의 개 수 는 다음 과 같 습 니 다.
INSERT OVERWRITE TABLE pv_gender_sum SELECT pv_users.gender, count (DISTINCT pv_users.userid) FROM pv_users GROUP BY pv_users.gender;
너 는 이렇게 할 수 있다.
INSERT OVERWRITE TABLE pv_gender_agg SELECT pv_users.gender, count(DISTINCT pv_users.userid), count(*), sum(DISTINCT pv_users.userid) FROM pv_users GROUP BY pv_users.gender;
하지만 그 러 시 면 안 됩 니 다.
INSERT OVERWRITE TABLE pv_gender_agg SELECT pv_users.gender, count(DISTINCT pv_users.userid), count(DISTINCT pv_users.ip) FROM pv_users GROUP BY pv_users.gender; groupby
group by 문 구 를 사용 할 때 select 에서 group by 필드 만 사용 할 수 있 습 니 다.다른 필드 를 사용 하려 면 udaf 를 사용 해 야 합 니 다.UDF 이후 단독 설명)예 를 들 어 다음 과 같은 예:
CREATE TABLE t1(a INTEGER, b INTGER);
SELECT a, sum(b) FROM t1 GROUP BY a;
다음 문장 은 안 됩 니 다.
SELECT a, b FROM t1 GROUP BY a; select group by b, :
a b
100 1 100 2 100 3 a group by ,b ? , hive 。 UDAF
고급 특성
  • GroupBy

  • 예:
    ROM pv_users INSERT OVERWRITE TABLE pv_gender_sum SELECT pv_users.gender, count(DISTINCT pv_users.userid) GROUP BY pv_users.gender INSERT OVERWRITE DIRECTORY ‘/user/facebook/tmp/pv_age_sum’ SELECT pv_users.age, count(DISTINCT pv_users.userid) GROUP BY pv_users.age; Group By 의 Map-side 취 합 설정 hiv.map.aggr=true 로 시작 합 니 다.기본 값 은 false 입 니 다.이렇게 하면 집행 효율 을 높 일 수 있 고,그러면 희생 하 는 것 은 메모리 다.set hive.map.aggr=true; SELECT COUNT(*) FROM table2;
    4.567914.이 문 서 는 주로 groupby 자구 의 고급 집적 특성 을 소개 한다.
  • rouping Sets, Cubes, Rollups, and the GROUPING__ID Function ,Cube, ,RollUp
  • GROUPING SETS GROUPING SETS GROUP BY , GROUP BY , group by union all , ,
    hive -e “use acorn_3g;desc test_xinyan_reg;” user_id bigint None device_id int None 휴대폰,태 블 릿 OSid int None 운영 체제 유형 appid int None 모 바 일 앱id client_version string 없 음 클 라 이언 트 버 전 fromid int None 4 급 채널 몇 개의 데모 가 여러분 의 이 해 를 돕 습 니 다.
    	### `		grouping sets  	  hive  `
    

    elect device_id,os_id,app_id,count(user_id) from test_xinyan_reg group by device_id,os_id,app_id grouping sets((device_id)) SELECT device_id,null,null,count(user_id) FROM test_xinyan_reg group by device_id select device_id,os_id,app_id,count(user_id) from test_xinyan_reg group by device_id,os_id,app_id grouping sets((device_id,os_id),(os_id,app_id)) SELECT device_id,os_id,null,count(user_id) FROM test_xinyan_reg group by device_id,os_id
  • acorn_3g.test_xinyan_reg :

  • elect null,os_id,app_id,count(user_id) from Test_xinyan_reg group by os_id,app_id; select device_id,os_id,app_id,count(user_id) from test_xinyan_reg group by device_id,os_id,app_id grouping sets((device_id,os_id),(device_id)) SELECT device_id,os_id,null,count(user_id) FROM test_xinyan_reg group by device_id,os_id
  • Union all

  • ELECT device_id,null,null,count(user_id) FROM test_xinyan_reg group by device_id select device_id,os_id,app_id,count(user_id) from test_xinyan_reg group by device_id,os_id,app_id grouping sets((device_id),(os_id),(device_id,os_id),()) SELECT device_id,null,null,count(user_id) FROM test_xinyan_reg group by device_id UNION ALL SELECT null,os_id,null,count(user_id) FROM test_xinyan_reg group by os_id UNION ALL SELECT device_id,os_id,null,count(user_id) FROM test_xinyan_reg group by device_id,os_id UNION ALL SELECT null,null,null,count(user_id) FROM test_xinyan_reg
  • UNION ALL
  • CUBE
    SELECT device_id,null,null,null,null ,count(user_id) FROM test_xinyan_reg group by device_id UNION ALL SELECT null,os_id,null,null,null ,count(user_id) FROM test_xinyan_reg group by os_id UNION ALL SELECT device_id,os_id,null,null,null ,count(user_id) FROM test_xinyan_reg group by device_id,os_id UNION ALL SELECT null,null,app_id,null,null ,count(user_id) FROM test_xinyan_reg group by app_id UNION ALL SELECT device_id,null,app_id,null,null ,count(user_id) FROM test_xinyan_reg group by device_id,app_id UNION ALL SELECT null,os_id,app_id,null,null ,count(user_id) FROM test_xinyan_reg group by os_id,app_id UNION ALL SELECT device_id,os_id,app_id,null,null ,count(user_id) FROM test_xinyan_reg group by device_id,os_id,app_id UNION ALL SELECT null,null,null,client_version,null ,count(user_id) FROM test_xinyan_reg group by client_version UNION ALL SELECT device_id,null,null,client_version,null ,count(user_id) FROM test_xinyan_reg group by device_id,client_version UNION ALL SELECT null,os_id,null,client_version,null ,count(user_id) FROM test_xinyan_reg group by os_id,client_version UNION ALL SELECT device_id,os_id,null,client_version,null ,count(user_id) FROM test_xinyan_reg group by device_id,os_id,client_version UNION ALL SELECT null,null,app_id,client_version,null ,count(user_id) FROM test_xinyan_reg group by app_id,client_version UNION ALL SELECT device_id,null,app_id,client_version,null ,count(user_id) FROM test_xinyan_reg group by device_id,app_id,client_version UNION ALL SELECT null,os_id,app_id,client_version,null ,count(user_id) FROM test_xinyan_reg group by os_id,app_id,client_version UNION ALL SELECT device_id,os_id,app_id,client_version,null ,count(user_id) FROM test_xinyan_reg group by device_id,os_id,app_id,client_version UNION ALL SELECT null,null,null,null,from_id ,count(user_id) FROM test_xinyan_reg group by from_id UNION ALL SELECT device_id,null,null,null,from_id ,count(user_id) FROM test_xinyan_reg group by device_id,from_id UNION ALL SELECT null,os_id,null,null,from_id ,count(user_id) FROM test_xinyan_reg group by os_id,from_id UNION ALL SELECT device_id,os_id,null,null,from_id ,count(user_id) FROM test_xinyan_reg group by device_id,os_id,from_id UNION ALL SELECT null,null,app_id,null,from_id ,count(user_id) FROM test_xinyan_reg group by app_id,from_id UNION ALL SELECT device_id,null,app_id,null,from_id ,count(user_id) FROM test_xinyan_reg group by device_id,app_id,from_id UNION ALL SELECT null,os_id,app_id,null,from_id ,count(user_id) FROM test_xinyan_reg group by os_id,app_id,from_id UNION ALL SELECT device_id,os_id,app_id,null,from_id ,count(user_id) FROM test_xinyan_reg group by device_id,os_id,app_id,from_id UNION ALL SELECT null,null,null,client_version,from_id ,count(user_id) FROM test_xinyan_reg group by client_version,from_id UNION ALL SELECT device_id,null,null,client_version,from_id ,count(user_id) FROM test_xinyan_reg group by device_id,client_version,from_id UNION ALL SELECT null,os_id,null,client_version,from_id ,count(user_id) FROM test_xinyan_reg group by os_id,client_version,from_id UNION ALL SELECT device_id,os_id,null,client_version,from_id ,count(user_id) FROM test_xinyan_reg group by device_id,os_id,client_version,from_id UNION ALL SELECT null,null,app_id,client_version,from_id ,count(user_id) FROM test_xinyan_reg group by app_id,client_version,from_id UNION ALL SELECT device_id,null,app_id,client_version,from_id ,count(user_id) FROM test_xinyan_reg group by device_id,app_id,client_version,from_id UNION ALL SELECT null,os_id,app_id,client_version,from_id ,count(user_id) FROM test_xinyan_reg group by os_id,app_id,client_version,from_id UNION ALL SELECT device_id,os_id,app_id,client_version,from_id ,count(user_id) FROM test_xinyan_reg group by device_id,os_id,app_id,client_version,from_id UNION ALL
    SELECT null,null,null,null,null ,count(user_id) FROM test_xinyan_reg 가 아파 보 이 죠?cube 의 강 함 을 느 꼈 나 요?낮은 버 전 hiv 는 유 니 온 all 방식 으로 해결 할 수 있 으 니 어 쩔 수 없 는 방법 이 라 고 할 수 있다)
  • cube , hive ,cube(a,b,c) (a,b,c) group by, (a,b),(a,c),(a),(b,c),(b),(c), group by, select device_id,os_id,app_id,client_version,from_id,count(user_id) from test_xinyan_reg group by device_id,os_id,app_id,client_version,from_id with cube; hql ( , ):
  • ROLL UP
    select device_id,os_id,app_id,client_version,from_id,count(user_id) from test_xinyan_reg group by device_id,os_id,app_id,client_version,from_id with rollup; rollup , 。
    select device_id,os_id,app_id,client_version,from_id,count(user_id) from test_xinyan_reg group by device_id,os_id,app_id,client_version,from_id grouping sets ((device_id,os_id,app_id,client_version,from_id),(device_id,os_id,app_id,client_version),(device_id,os_id,app_id),(device_id,os_id),(device_id),());
  • sql :
  • Grouping_ID , null, null , null。( , ,grouping_id ) grouping_id , grouping_id group by , group by grouping_id 。 group by A,B, grouping_id(A,B)。 grouping_id() grouping() ,grouping_id(A, B) grouping(A) + grouping(B), + , , grouping(A)=1,grouping(B)=1, grouping_id(A, B)=11B, 3。 sql , , 。
    Column1 (key) Column2 (value) 1 NULL 1 1 2 2 3 3 3 NULL 4 5
    >  SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
    
    hql : key value Grouping__ID count NULL NULL 0 00 6 1 NULL 1 10 2 1 NULL 3 11 1 1 1 3 11 1 2 NULL 1 10 1 2 2 3 11 1 3 NULL 1 10 2 3 NULL 3 11 1 3 3 3 11 1 4 NULL 1 10 1 4 5 3 11 1
    	GROUPING__ID      ,         null,         null。(   DataFilterNull.py   ,         null、“”    ),
    
  • Grouping function
    SELECT key, value, GROUPING__ID, grouping(key, value), grouping(value, key), grouping(key), grouping(value), count(*) FROM T1 GROUP BY key, value WITH ROLLUP; grouping NULL , NULL 2 , NULL, rollup、cube、grouping sets NULL 。 ,grouping(NULL) 0; ,grouping(NULL) 1。 , null grouping 1, ROLLUP-NULL 。
    NULL NULL 3 3 3 1 1 6 1 NULL 0 0 0 0 0 2 1 NULL 1 1 2 0 1 1 1 1 0 0 0 0 0 1 2 NULL 1 1 2 0 1 1 2 2 0 0 0 0 0 1 3 NULL 0 0 0 0 0 2 3 NULL 1 1 2 0 1 1 3 3 0 0 0 0 0 1 4 NULL 1 1 2 0 1 1 4 5 0 0 0 0 0 1 This query will produce the following results.

    좋은 웹페이지 즐겨찾기