1. Weekly Retention을 구하는 쿼리를 바닥부터 스스로 작성해보세요
-- ARRAY 풀기 / 시간 데이터 변환
WITH base AS (
SELECT
DISTINCT
user_id,
user_pseudo_id,
event_name,
DATE(DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul')) AS event_date,
DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_datetime,
DATE_TRUNC(event_date, WEEK(MONDAY)) AS event_week,
event_param.key AS event_key,
event_param.value.string_value AS string_value,
event_param.value.int_value AS int_value,
platform
FROM advanced.app_logs AS a
CROSS JOIN UNNEST(event_params) AS event_param
WHERE event_date BETWEEN '2022-08-01' AND '2023-01-08'
 ), diff_week AS (
    SELECT
      DISTINCT user_pseudo_id,
      DATE_DIFF(event_week, first_week, WEEK) AS diff_of_week
    FROM (
     SELECT
       *,
       MIN(event_week) OVER(PARTITION BY user_pseudo_id ORDER BY event_week) AS first_week
     FROM base
   )
)
 -- retention
  SELECT
    diff_of_week,
    ROUND(SAFE_DIVIDE(cnt, first_week_cnt),3) AS retention_ratio
  FROM (
      SELECT
        *,
        FIRST_VALUE(cnt) OVER(ORDER BY diff_of_week) AS first_week_cnt
      FROM (
          SELECT
            diff_of_week,
            COUNT(DISTINCT user_pseudo_id) cnt
          FROM diff_week
          GROUP BY diff_of_week
      )
  )
  order by diff_of_week
  1. Retain User를 New + Current + Resurrected + Dormant User로 나누는 쿼리를 작성해보세요
 --조건 설정
   --1) 설정 기간이 '2022-08-01' ~ '2023-01-08'까지고, '2023-01-09' 추출한다고 가정
   --2) Retention Curve에서 10주차 이후에 0.06 수준으로 떨어져 10주 이상 미사용 시 이탈로 설정

 --New User : 추출일 기준 최근 4주 이내 첫 사용자
 --Current User : 나머지 사용자
 --Resurrected User : 10주 이상 미사용 후 다시 사용
 --Dormant User : 추출일 기준 10주 이전에 마지막 사용자

  -- 첫 사용주(first_week) / 마지막 사용주(last_week) 구하기
     , first_last_week AS (
     SELECT 
       DISTINCT user_pseudo_id,
       MIN(event_week) OVER(PARTITION BY user_pseudo_id) AS first_week,
       MAX(event_week) OVER(PARTITION BY user_pseudo_id) AS last_week,
       DATE_TRUNC('2023-01-09', WEEK(MONDAY)) AS extract_date
     FROM base
  -- 직전 사용주와의 차이 구하기
    ), diff_of_event_week AS (
     SELECT
       user_pseudo_id,
       DATE_DIFF(event_week, lag_week, WEEK) diff_of_lag_week
     FROM (
          SELECT
            DISTINCT user_pseudo_id,
            event_week,
            LAG(event_week) OVER(PARTITION BY user_pseudo_id ORDER BY event_week) AS lag_week
          FROM base
  )
  ), retain_users AS ( 
    SELECT
      a.user_pseudo_id,
      first_week,
      last_week,
      max_diff_lag_week,
      extract_date
    FROM first_last_week AS a
    INNER JOIN (
        SELECT 
          DISTINCT user_pseudo_id,
          MAX(diff_of_lag_week) OVER(PARTITION BY user_pseudo_id) AS max_diff_lag_week
        FROM diff_of_event_week
    ) AS b ON a.user_pseudo_id = b.user_pseudo_id
     )
     
   -- 유저 타입별 count
  SELECT 
    user_type,
    COUNT(DISTINCT user_pseudo_id) cnt
  FROM (
    SELECT
      user_pseudo_id,
      CASE WHEN DATE_DIFF(extract_date, first_week, WEEK) <= 4 THEN 'New_User'
           WHEN DATE_DIFF(extract_date, last_week, WEEK) >= 10 THEN 'Dormant_User'
           WHEN max_diff_lag_week >= 10 THEN 'Resurrected_User'
      ELSE 'Current_User' END user_type
    FROM retain_users
  )
  GROUP BY user_type
  1. 주어진 데이터에서 어떤 사람들이 리텐션이 그나마 높을까요? 찾아보세요
, base2 AS (
  SELECT
    DISTINCT
    user_id,
    user_pseudo_id,
    event_name,
    event_week,
    MAX(IF(event_key = 'firebase_screen', string_value, null)) AS firebase_screen,
    MAX(IF(event_key = 'food_id',int_value, null)) AS food_id,
    MAX(IF(event_key = 'session_id', string_value, null)) AS session_id,
    MAX(IF(event_key = 'food_category', string_value, null)) AS food_ctgr,
    MAX(IF(event_key = 'use_recommend_food', string_value, null)) AS use_recommend_food,
    MAX(IF(event_key = 'search_keyword', string_value, null)) AS search_keyword,
    platform
  FROM base
  GROUP BY ALL
), diff_of_week2 AS (
SELECT 
  *,
  DATE_DIFF(event_week, first_week, WEEK) diff_week
FROM (
    SELECT
      *,
      MIN(event_week) OVER(PARTITION BY user_pseudo_id) first_week
    FROM base2
)
)
-- 특징 컬럼별 평균 리텐션 구하기
SELECT
  use_recommend_food,--
  first_week_cnt,
  AVG(retention) retention_avg
FROM (
    SELECT
      use_recommend_food, --event_name / firebase_screen / food_ctgr / use_recommend_food
      diff_week,
      first_week_cnt,
      ROUND(SAFE_DIVIDE(cnt, first_week_cnt),3) retention
    FROM (
        SELECT
          *,
          FIRST_VALUE(cnt) OVER(PARTITION BY use_recommend_food ORDER BY diff_week) first_week_cnt
        FROM (
            SELECT 
              use_recommend_food,
              diff_week,
              COUNT(DISTINCT user_pseudo_id) cnt
            FROM diff_of_week2
            GROUP BY ALL
        )
    )
)
GROUP BY ALL
ORDER BY retention_avg DESC

--- 검색한 사람 리텐션
SELECT
  search_use,--
  first_week_cnt,
  AVG(retention) retention_avg
FROM (
    SELECT
      search_use, --event_name / firebase_screen / food_ctgr / use_recommend_food
      diff_week,
      first_week_cnt,
      ROUND(SAFE_DIVIDE(cnt, first_week_cnt),3) retention
    FROM (
        SELECT
          *,
          FIRST_VALUE(cnt) OVER(PARTITION BY search_use ORDER BY diff_week) first_week_cnt
        FROM (
            SELECT 
              CASE WHEN search_keyword IS NOT NULL THEN 'search_y'
              ELSE 'search_n' END search_use,
              diff_week,
              COUNT(DISTINCT user_pseudo_id) cnt
            FROM diff_of_week2
            GROUP BY ALL
        )
    )
)
GROUP BY ALL
ORDER BY retention_avg DESC

'2022-08-01' ~ '2023-01-08' 기간 리텐션 평균 : 0.0934

event_name first_week_cnt retention_avg
1 click_restaurant_nearby 1646 0.096347826086956509
2 click_search 6996 0.094391304347826055
3 request_search 6996 0.094391304347826055
4 click_banner 8093 0.09408695652173
firebase_screen first_week_cnt retention_avg
1 search_result 6996 0.094391304347826055
2 search 6996 0.094391304347826
search_use first_week_cnt retention_avg
1 search_y 6996 0.094391304347826055
2 search_n 51492 0.093391304347826082

0.0934보다 높은 ‘restaurant_nearby’ 클릭한 사람 / 검색을 사용한 사람

  1. Core Event를 "click_payment"라고 설정하고 Weekly Retention을 구해주세요