Допустим мы имеем таблицу с профилями пользователей. Профилем называется таблица, где каждому пользователю соответствует метка, которая указывает, к какой когорте относится пользователь.
Профили могут быть получены из журнала посещений. Для каждого идентификатора пользователя нужно назначить когорту, дату первого посещения сайта.
WITH profile AS (SELECT user_id,
dt AS cohort
FROM online_store.profiles
WHERE channel = 'Organic'),
pr_session AS (SELECT user_id,
session_start::date AS session_date
FROM online_store.sessions)
SELECT *
FROM profile JOIN pr_session ON profile.user_id = pr_session.user_id;
Определяем количество пользователей на каждую дату и на каждую когорту
WITH profile AS (SELECT user_id,
dt AS cohort
FROM online_store.profiles
WHERE channel = 'Organic'),
pr_session AS (SELECT user_id,
session_start::date AS session_date
FROM online_store.sessions)
SELECT cohort,
session_date,
COUNT(DISTINCT pr_session.user_id) AS cnt_user
FROM profile JOIN pr_session ON profile.user_id = pr_session.user_id
GROUP BY cohort, session_date;
WITH profile AS (SELECT user_id,
dt AS cohort
FROM online_store.profiles
WHERE channel = 'Organic'),
pr_session AS (SELECT user_id,
session_start::date AS cnt_date
FROM online_store.sessions),
cohorts AS (SELECT cohort,
cnt_date,
COUNT(DISTINCT profile.user_id) AS cnt_user
FROM profile JOIN pr_session ON profile.user_id = pr_session.user_id
GROUP BY cohort, cnt_date)
SELECT *,
MAX(cnt_user) OVER (PARTITION BY cohort) AS cnt_user_in_cohort,
ROUND(cnt_user::numeric/MAX(cnt_user) OVER (PARTITION BY cohort)::numeric,3)
FROM cohorts;
Другой пример:
Первым событием возьмем первую покупку
WITH
first_ts AS (
SELECT o.user_id,
o.paid_at,
MIN(o.paid_at) OVER(PARTITION BY o.user_id) AS first_dt
FROM tools_shop.orders AS o
),
cohorts AS (
SELECT first_ts.user_id,
CAST(DATE_TRUNC('month', first_ts.paid_at) AS date) AS paid_at,
CAST(DATE_TRUNC('month', first_ts.first_dt) AS date) AS cohort
FROM first_ts
WHERE EXTRACT(YEAR FROM first_ts.first_dt) = 2020
),
users AS (
SELECT cohorts.cohort,
cohorts.paid_at,
CAST(COUNT(DISTINCT cohorts.user_id) AS numeric) AS users
FROM cohorts
GROUP BY cohorts.cohort, cohorts.paid_at
)
SELECT users.cohort,
users.paid_at,
ROUND(users.users / MAX(users.users) OVER(PARTITION BY users.cohort), 4)*100 AS retention
FROM users;
Комментариев нет:
Отправить комментарий