- Weekly Retention을 구하는 쿼리를 바닥부터 스스로 작성해보세요
-- ARRAY 풀기 / 시간 데이터 변환
WITH base AS (
SELECT
DISTINCT
user_id,
user_pseudo_id,
event_name,
DATE(DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul')) AS event_date,
DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_datetime,
DATE_TRUNC(event_date, WEEK(MONDAY)) AS event_week,
event_param.key AS event_key,
event_param.value.string_value AS string_value,
event_param.value.int_value AS int_value,
platform
FROM advanced.app_logs AS a
CROSS JOIN UNNEST(event_params) AS event_param
WHERE event_date BETWEEN '2022-08-01' AND '2023-01-08'
), diff_week AS (
SELECT
DISTINCT user_pseudo_id,
DATE_DIFF(event_week, first_week, WEEK) AS diff_of_week
FROM (
SELECT
*,
MIN(event_week) OVER(PARTITION BY user_pseudo_id ORDER BY event_week) AS first_week
FROM base
)
)
-- retention
SELECT
diff_of_week,
ROUND(SAFE_DIVIDE(cnt, first_week_cnt),3) AS retention_ratio
FROM (
SELECT
*,
FIRST_VALUE(cnt) OVER(ORDER BY diff_of_week) AS first_week_cnt
FROM (
SELECT
diff_of_week,
COUNT(DISTINCT user_pseudo_id) cnt
FROM diff_week
GROUP BY diff_of_week
)
)
order by diff_of_week
- Retain User를 New + Current + Resurrected + Dormant User로 나누는 쿼리를 작성해보세요
--조건 설정
--1) 설정 기간이 '2022-08-01' ~ '2023-01-08'까지고, '2023-01-09' 추출한다고 가정
--2) Retention Curve에서 10주차 이후에 0.06 수준으로 떨어져 10주 이상 미사용 시 이탈로 설정
--New User : 추출일 기준 최근 4주 이내 첫 사용자
--Current User : 나머지 사용자
--Resurrected User : 10주 이상 미사용 후 다시 사용
--Dormant User : 추출일 기준 10주 이전에 마지막 사용자
-- 첫 사용주(first_week) / 마지막 사용주(last_week) 구하기
, first_last_week AS (
SELECT
DISTINCT user_pseudo_id,
MIN(event_week) OVER(PARTITION BY user_pseudo_id) AS first_week,
MAX(event_week) OVER(PARTITION BY user_pseudo_id) AS last_week,
DATE_TRUNC('2023-01-09', WEEK(MONDAY)) AS extract_date
FROM base
-- 직전 사용주와의 차이 구하기
), diff_of_event_week AS (
SELECT
user_pseudo_id,
DATE_DIFF(event_week, lag_week, WEEK) diff_of_lag_week
FROM (
SELECT
DISTINCT user_pseudo_id,
event_week,
LAG(event_week) OVER(PARTITION BY user_pseudo_id ORDER BY event_week) AS lag_week
FROM base
)
), retain_users AS (
SELECT
a.user_pseudo_id,
first_week,
last_week,
max_diff_lag_week,
extract_date
FROM first_last_week AS a
INNER JOIN (
SELECT
DISTINCT user_pseudo_id,
MAX(diff_of_lag_week) OVER(PARTITION BY user_pseudo_id) AS max_diff_lag_week
FROM diff_of_event_week
) AS b ON a.user_pseudo_id = b.user_pseudo_id
)
-- 유저 타입별 count
SELECT
user_type,
COUNT(DISTINCT user_pseudo_id) cnt
FROM (
SELECT
user_pseudo_id,
CASE WHEN DATE_DIFF(extract_date, first_week, WEEK) <= 4 THEN 'New_User'
WHEN DATE_DIFF(extract_date, last_week, WEEK) >= 10 THEN 'Dormant_User'
WHEN max_diff_lag_week >= 10 THEN 'Resurrected_User'
ELSE 'Current_User' END user_type
FROM retain_users
)
GROUP BY user_type
- 주어진 데이터에서 어떤 사람들이 리텐션이 그나마 높을까요? 찾아보세요
, base2 AS (
SELECT
DISTINCT
user_id,
user_pseudo_id,
event_name,
event_week,
MAX(IF(event_key = 'firebase_screen', string_value, null)) AS firebase_screen,
MAX(IF(event_key = 'food_id',int_value, null)) AS food_id,
MAX(IF(event_key = 'session_id', string_value, null)) AS session_id,
MAX(IF(event_key = 'food_category', string_value, null)) AS food_ctgr,
MAX(IF(event_key = 'use_recommend_food', string_value, null)) AS use_recommend_food,
MAX(IF(event_key = 'search_keyword', string_value, null)) AS search_keyword,
platform
FROM base
GROUP BY ALL
), diff_of_week2 AS (
SELECT
*,
DATE_DIFF(event_week, first_week, WEEK) diff_week
FROM (
SELECT
*,
MIN(event_week) OVER(PARTITION BY user_pseudo_id) first_week
FROM base2
)
)
-- 특징 컬럼별 평균 리텐션 구하기
SELECT
use_recommend_food,--
first_week_cnt,
AVG(retention) retention_avg
FROM (
SELECT
use_recommend_food, --event_name / firebase_screen / food_ctgr / use_recommend_food
diff_week,
first_week_cnt,
ROUND(SAFE_DIVIDE(cnt, first_week_cnt),3) retention
FROM (
SELECT
*,
FIRST_VALUE(cnt) OVER(PARTITION BY use_recommend_food ORDER BY diff_week) first_week_cnt
FROM (
SELECT
use_recommend_food,
diff_week,
COUNT(DISTINCT user_pseudo_id) cnt
FROM diff_of_week2
GROUP BY ALL
)
)
)
GROUP BY ALL
ORDER BY retention_avg DESC
--- 검색한 사람 리텐션
SELECT
search_use,--
first_week_cnt,
AVG(retention) retention_avg
FROM (
SELECT
search_use, --event_name / firebase_screen / food_ctgr / use_recommend_food
diff_week,
first_week_cnt,
ROUND(SAFE_DIVIDE(cnt, first_week_cnt),3) retention
FROM (
SELECT
*,
FIRST_VALUE(cnt) OVER(PARTITION BY search_use ORDER BY diff_week) first_week_cnt
FROM (
SELECT
CASE WHEN search_keyword IS NOT NULL THEN 'search_y'
ELSE 'search_n' END search_use,
diff_week,
COUNT(DISTINCT user_pseudo_id) cnt
FROM diff_of_week2
GROUP BY ALL
)
)
)
GROUP BY ALL
ORDER BY retention_avg DESC
'2022-08-01' ~ '2023-01-08' 기간 리텐션 평균 : 0.0934
행 |
event_name |
first_week_cnt |
retention_avg |
|
1 |
click_restaurant_nearby |
1646 |
0.096347826086956509 |
|
2 |
click_search |
6996 |
0.094391304347826055 |
|
3 |
request_search |
6996 |
0.094391304347826055 |
|
4 |
click_banner |
8093 |
0.09408695652173 |
|
행 |
firebase_screen |
first_week_cnt |
retention_avg |
|
1 |
search_result |
6996 |
0.094391304347826055 |
|
2 |
search |
6996 |
0.094391304347826 |
|
행 |
search_use |
first_week_cnt |
retention_avg |
|
1 |
search_y |
6996 |
0.094391304347826055 |
|
2 |
search_n |
51492 |
0.093391304347826082 |
|
0.0934보다 높은 ‘restaurant_nearby’ 클릭한 사람 / 검색을 사용한 사람
- Core Event를 "click_payment"라고 설정하고 Weekly Retention을 구해주세요