데.분.레 - 복습 12

연령별 구분 집계하기


mst_users 테이블이다.

WITH mst_users_with_int_birth_date AS (
  SELECT *
       , 20170101 AS int_specific_date
       , CAST(REPLACE(SUBSTRING(birth_date, 1, 10), '-', '') AS integer) AS int_birth_date
    FROM mst_users
  )
, mst_users_with_age AS (
  SELECT *
       , FLOOR((int_specific_date-int_birth_date)/10000) AS age
    FROM mst_users_with_int_birth_date
    )
SELECT user_id
     , sex
     , birth_date
     , age
  FROM mst_users_with_age


REPLACE : 바꾸려는 문자열과 바꾸려는 문자를 입력하고, 어떤 문자로 바꿀지를 입력하면 된다.
CAST : 데이터 형식을 다른 데이터 형식으로 바꿔주는 함수이다.
이 두가지 함수를 사용하여 특정 날짜에 사용자의 나이를 구할 수 있다

WITH 구문을 사용하여 연령별 구분을 추가할 수 있다.

WITH mst_users_with_int_birth_date AS (
  SELECT *
       , 20170101 AS int_specific_date
       , CAST(REPLACE(SUBSTRING(birth_date, 1, 10), '-', '') AS integer) AS int_birth_date
    FROM mst_users
  )
, mst_users_with_age AS (
  SELECT *
       , FLOOR((int_specific_date-int_birth_date)/10000) AS age
    FROM mst_users_with_int_birth_date
    )
, mst_users_with_category AS (
  SELECT user_id
       , sex
       , age
       , CONCAT(CASE WHEN 20<=age THEN sex
                ELSE '' END
       , CASE WHEN age BETWEEN 4 AND 12 THEN 'C'
              WHEN age BETWEEN 13 AND 19 THEN 'T'
              WHEN age BETWEEN 20 AND 34 THEN '1'
              WHEN age BETWEEN 35 AND 49 THEN '2'
              WHEN age >= 50 THEN '3'
              END) AS category
     FROM mst_users_with_age)

SELECT *
  FROM mst_users_with_category


CONCAT : 문자열을 합쳐주는 역할을 한다.

연령별 구분의 특징 추출하기

이번에는 action_log 테이블과 JOIN하여 각각 구매한 상품의 카테고리를 집계해보자.

WITH mst_users_with_int_birth_date AS (
  SELECT *
       , 20170101 AS int_specific_date
       , CAST(REPLACE(SUBSTRING(birth_date, 1, 10), '-', '') AS integer) AS int_birth_date
    FROM mst_users
  )
, mst_users_with_age AS (
  SELECT *
       , FLOOR((int_specific_date-int_birth_date)/10000) AS age
    FROM mst_users_with_int_birth_date
    )
, mst_users_with_category AS (
  SELECT user_id
       , sex
       , age
       , CONCAT(CASE WHEN 20<=age THEN sex
                ELSE '' END
       , CASE WHEN age BETWEEN 4 AND 12 THEN 'C'
              WHEN age BETWEEN 13 AND 19 THEN 'T'
              WHEN age BETWEEN 20 AND 34 THEN '1'
              WHEN age BETWEEN 35 AND 49 THEN '2'
              WHEN age >= 50 THEN '3'
              END) AS category
     FROM mst_users_with_age)

SELECT p.category AS product_category
     , u.category AS user_category
     , COUNT(*) AS purchase_count
  FROM action_log AS p
  JOIN mst_users_with_category AS u
  ON p.user_id = u.user_id
  WHERE action = 'purchase'
  GROUP BY p.category, u.category
  ORDER BY p.category, u.category

좋은 웹페이지 즐겨찾기