Составляем профили
- идентификатор пользователя
- начальный момент времени (время появления пользователя)
SELECT p.user_id,
DATE_TRUNC('day', first_ts)::date AS start_cohort,
COUNT(*) OVER (PARTITION BY DATE_TRUNC('day', first_ts)) AS cohort_size
FROM online_store.profiles p;
Собираем таблицу сессий
WITH user_profile AS (SELECT p.user_id,
DATE_TRUNC('day', first_ts)::date AS start_cohort,
COUNT(*) OVER (PARTITION BY DATE_TRUNC('day', first_ts)) AS cohort_size
FROM online_store.profiles p)
SELECT EXTRACT(DAY FROM AGE(DATE_TRUNC('day', o.event_dt), up.start_cohort)) AS lifetime,
o.event_dt AS order_date,
start_cohort,
cohort_size,
revenue
FROM user_profile up
JOIN online_store.orders o ON up.user_id = o.user_id;
-- в результате остаются только записи тех user_id, которые совершили покупки (размер когорты уже посчитан)
Считаем LTV
WITH user_profile AS (SELECT p.user_id,
DATE_TRUNC('day', first_ts)::date AS start_cohort,
COUNT(*) OVER (PARTITION BY DATE_TRUNC('day', first_ts)) AS cohort_size
FROM online_store.profiles p),
sessions AS (SELECT EXTRACT(DAY FROM AGE(DATE_TRUNC('day', o.event_dt), up.start_cohort)) AS lifetime,
o.event_dt AS order_date,
start_cohort,
cohort_size,
revenue
FROM user_profile up
JOIN online_store.orders o ON up.user_id = o.user_id)
SELECT lifetime,
start_cohort,
SUM(revenue) OVER (PARTITION BY start_cohort ORDER BY lifetime) / cohort_size AS ltv
FROM sessions;
--запрос порождает большое количество дублей т.к. мы избавились от order_date
WITH user_profile AS (SELECT p.user_id,
DATE_TRUNC('day', first_ts)::date AS start_cohort,
COUNT(*) OVER (PARTITION BY DATE_TRUNC('day', first_ts)) AS cohort_size
FROM online_store.profiles p),
sessions AS (SELECT EXTRACT(DAY FROM AGE(DATE_TRUNC('day', o.event_dt), up.start_cohort)) AS lifetime,
o.event_dt AS order_date,
start_cohort,
cohort_size,
revenue
FROM user_profile up
JOIN online_store.orders o ON up.user_id = o.user_id),
ltv_raw AS (SELECT lifetime,
start_cohort,
SUM(revenue) OVER (PARTITION BY start_cohort ORDER BY lifetime) / cohort_size AS ltv
FROM sessions)
SELECT lifetime,
start_cohort,
ltv
FROM ltv_raw
GROUP BY lifetime, start_cohort, ltv
ORDER BY start_cohort, lifetime;
Другой пример:
WITH user_profile AS (
SELECT u.user_id,
CAST(DATE_TRUNC('month', u.created_at) AS date) as start_cohort,
COUNT(u.user_id) OVER(PARTITION BY CAST(DATE_TRUNC('month', u.created_at) AS date)) AS cohort_size
FROM tools_shop.users AS u
),
sessions AS (
SELECT EXTRACT(MONTH FROM AGE(
DATE_TRUNC('day', o.created_at),
p.start_cohort
)) AS lifetime,
CAST( DATE_TRUNC('month', o.created_at) AS date) order_date, --FUCK
p.start_cohort,
p.cohort_size,
o.total_amt
FROM user_profile AS p
JOIN tools_shop.orders AS o ON p.user_id = o.user_id
),
ltv_raw AS (
SELECT lifetime,
start_cohort,
cohort_size,
SUM(total_amt) OVER (PARTITION BY start_cohort ORDER BY lifetime) / cohort_size AS ltv
FROM sessions
)
SELECT lifetime,
start_cohort,
ltv
FROM ltv_raw
WHERE EXTRACT(YEAR FROM start_cohort) = 2019
GROUP BY lifetime, start_cohort, ltv
ORDER BY start_cohort, lifetime;
Комментариев нет:
Отправить комментарий