Measure retention
You can use Lakehouse to measure whether users return after their first interaction and how retention changes over time.
This page provides queries for retention in the USAGE_ANALYTICS schema, including activation speed, churn, reactivation, day-N retention, funnel analysis, and cohort retention.
Before you begin
Before you run these queries, make sure:
- Your Lakehouse setup is complete and Lakehouse is enabled for your organization. If setup isn't complete, see Get started with Lakehouse.
Query parameters
These queries use the tables in the USAGE_ANALYTICS schema. For table definitions, see USAGE_ANALYTICS namespace.
Replace the placeholders in each query with values for your environment.
{{DATABASE}}: Snowflake database name{{SCHEMA}}: UseUSAGE_ANALYTICS{{DOMAIN}}: Customer domain, for example'acme.atlan.com'{{START_DATE}}: Start date, for example'2025-01-01'{{END_DATE}}: End date, for example'2025-12-31'{{RETENTION_DAYS}}: Number of days in the retention window, for example14
Activation funnel
Shows how quickly new users (created since {{START_DATE}}) take their first action, with cumulative activation rates at 1, 7, 14, and 30 days after account creation. Grouped by creation month.
- Snowflake
-- activation_funnel.sql
-- New user activation: how quickly do new users take their first action?
-- Shows % activated within 1 day, 7 days, 14 days, 30 days of account creation.
--
-- Parameters:
-- {{START_DATE}} - e.g., '2025-01-01' (for user creation date filter)
-- {{DOMAIN}} - e.g., 'acme.atlan.com'
WITH user_domains AS (
SELECT user_id, MAX(domain) AS domain
FROM {{DATABASE}}.{{SCHEMA}}.PAGES
WHERE domain IS NOT NULL
GROUP BY user_id
),
users_deduped AS (
SELECT
id,
MAX(role) AS role,
MIN(created_at) AS user_created_at
FROM {{DATABASE}}.{{SCHEMA}}.USERS
GROUP BY id
),
first_activity AS (
SELECT
user_id,
MIN(CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', TIMESTAMP)) AS first_event_ts
FROM (
SELECT t.user_id, t.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.TRACKS t
INNER JOIN user_domains ud ON ud.user_id = t.user_id
WHERE t.event_text NOT IN (
'atlan_analaytics_aggregateinfo_fetch',
'api_error_emit', 'api_evaluator_cancelled', 'api_evaluator_succeeded',
'Experiment Started', '$experiment_started',
'web_vital_metric_inp_track', 'web_vital_metric_ttfb_track',
'performance_metric_user_timing_discovery_search',
'performance_metric_user_timing_app_bootstrap',
'web_vital_metric_fcp_track', 'web_vital_metric_lcp_track'
)
AND t.event_text NOT LIKE 'workflow_%'
AND ud.domain = {{DOMAIN}}
UNION ALL
SELECT p.user_id, p.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.PAGES p
WHERE p.domain = {{DOMAIN}}
)
GROUP BY user_id
),
new_users AS (
SELECT
ud.user_id,
u.role,
u.user_created_at,
DATE_TRUNC('MONTH', u.user_created_at) AS creation_month,
fa.first_event_ts,
DATEDIFF('day', u.user_created_at, fa.first_event_ts) AS days_to_first_action
FROM user_domains ud
INNER JOIN users_deduped u ON u.id = ud.user_id
LEFT JOIN first_activity fa ON fa.user_id = ud.user_id
WHERE ud.domain = {{DOMAIN}}
AND u.user_created_at >= {{START_DATE}}
)
SELECT
creation_month,
COUNT(*) AS total_new_users,
COUNT(CASE WHEN days_to_first_action <= 1 THEN 1 END) AS activated_1d,
COUNT(CASE WHEN days_to_first_action <= 7 THEN 1 END) AS activated_7d,
COUNT(CASE WHEN days_to_first_action <= 14 THEN 1 END) AS activated_14d,
COUNT(CASE WHEN days_to_first_action <= 30 THEN 1 END) AS activated_30d,
COUNT(CASE WHEN days_to_first_action IS NULL THEN 1 END) AS never_activated,
ROUND(100.0 * COUNT(CASE WHEN days_to_first_action <= 1 THEN 1 END) / COUNT(*), 1) AS pct_1d,
ROUND(100.0 * COUNT(CASE WHEN days_to_first_action <= 7 THEN 1 END) / COUNT(*), 1) AS pct_7d,
ROUND(100.0 * COUNT(CASE WHEN days_to_first_action <= 30 THEN 1 END) / COUNT(*), 1) AS pct_30d
FROM new_users
GROUP BY creation_month
ORDER BY creation_month DESC;
Sample output
| CREATION_MONTH | TOTAL_NEW_USERS | ACTIVATED_1D | ACTIVATED_7D | ACTIVATED_14D | ACTIVATED_30D | NEVER_ACTIVATED | PCT_1D | PCT_7D | PCT_30D |
|---|---|---|---|---|---|---|---|---|---|
| 2025-03-01 | 18 | 14 | 16 | 17 | 17 | 1 | 77.8 | 88.9 | 94.4 |
| 2025-02-01 | 12 | 9 | 11 | 11 | 12 | 0 | 75.0 | 91.7 | 100.0 |
| 2025-01-01 | 21 | 15 | 18 | 19 | 20 | 1 | 71.4 | 85.7 | 95.2 |
Churned users
Returns users who were active in the prior month but didn't appear in the current month. Includes email, role, job role, and the month in which they were last active.
- Snowflake
-- churned_users.sql
-- Users active in the prior month but NOT in the current month.
-- Includes last activity date, role, and top features used.
--
-- Parameters:
-- {{DOMAIN}} - e.g., 'acme.atlan.com'
WITH user_domains AS (
SELECT user_id, MAX(domain) AS domain
FROM {{DATABASE}}.{{SCHEMA}}.PAGES
WHERE domain IS NOT NULL
GROUP BY user_id
),
user_months AS (
SELECT DISTINCT
sub.user_id,
DATE_TRUNC('MONTH', CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', sub.TIMESTAMP)) AS activity_month
FROM (
SELECT t.user_id, t.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.TRACKS t
INNER JOIN user_domains ud ON ud.user_id = t.user_id
WHERE t.event_text NOT IN (
'atlan_analaytics_aggregateinfo_fetch',
'api_error_emit', 'api_evaluator_cancelled', 'api_evaluator_succeeded',
'Experiment Started', '$experiment_started',
'web_vital_metric_inp_track', 'web_vital_metric_ttfb_track',
'performance_metric_user_timing_discovery_search',
'performance_metric_user_timing_app_bootstrap',
'web_vital_metric_fcp_track', 'web_vital_metric_lcp_track'
)
AND t.event_text NOT LIKE 'workflow_%'
AND ud.domain = {{DOMAIN}}
UNION ALL
SELECT p.user_id, p.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.PAGES p
WHERE p.domain = {{DOMAIN}}
) sub
WHERE sub.TIMESTAMP >= DATEADD('month', -3, CURRENT_TIMESTAMP())
),
prev_month_users AS (
SELECT user_id
FROM user_months
WHERE activity_month = DATE_TRUNC('MONTH', DATEADD('month', -1, CURRENT_TIMESTAMP()))
),
curr_month_users AS (
SELECT DISTINCT user_id
FROM user_months
WHERE activity_month = DATE_TRUNC('MONTH', CURRENT_TIMESTAMP())
)
SELECT
p.user_id,
u.email,
u.username,
u.role,
u.job_role,
DATE_TRUNC('MONTH', DATEADD('month', -1, CURRENT_TIMESTAMP())) AS last_active_month
FROM prev_month_users p
LEFT JOIN curr_month_users c ON c.user_id = p.user_id
LEFT JOIN (
SELECT id, MAX(email) AS email, MAX(username) AS username, MAX(role) AS role, MAX(job_role) AS job_role
FROM {{DATABASE}}.{{SCHEMA}}.USERS
GROUP BY id
) u ON u.id = p.user_id
WHERE c.user_id IS NULL
ORDER BY p.user_id;
Sample output
| USER_ID | USERNAME | ROLE | JOB_ROLE | LAST_ACTIVE_MONTH | |
|---|---|---|---|---|---|
| usr_012 | [email protected] | carol | member | Business Analyst | 2025-02-01 |
| usr_031 | [email protected] | dan | member | Data Analyst | 2025-02-01 |
| usr_047 | [email protected] | eve | guest | Viewer | 2025-02-01 |
Reactivated users
Returns users who were inactive for 30 or more days and then returned. Shows the previous active date, reactivation date, and the gap in days.
- Snowflake
-- reactivated_users.sql
-- Users who were inactive for 30+ days and then returned.
-- Shows the gap duration and what they did on return.
--
-- Parameters:
-- {{START_DATE}} - e.g., '2025-01-01'
-- {{DOMAIN}} - e.g., 'acme.atlan.com'
WITH user_domains AS (
SELECT user_id, MAX(domain) AS domain
FROM {{DATABASE}}.{{SCHEMA}}.PAGES
WHERE domain IS NOT NULL
GROUP BY user_id
),
user_activity_days AS (
SELECT DISTINCT
sub.user_id,
DATE(CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', sub.TIMESTAMP)) AS activity_date
FROM (
SELECT t.user_id, t.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.TRACKS t
INNER JOIN user_domains ud ON ud.user_id = t.user_id
WHERE t.event_text NOT IN (
'atlan_analaytics_aggregateinfo_fetch',
'api_error_emit', 'api_evaluator_cancelled', 'api_evaluator_succeeded',
'Experiment Started', '$experiment_started',
'web_vital_metric_inp_track', 'web_vital_metric_ttfb_track',
'performance_metric_user_timing_discovery_search',
'performance_metric_user_timing_app_bootstrap',
'web_vital_metric_fcp_track', 'web_vital_metric_lcp_track'
)
AND t.event_text NOT LIKE 'workflow_%'
AND ud.domain = {{DOMAIN}}
UNION ALL
SELECT p.user_id, p.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.PAGES p
WHERE p.domain = {{DOMAIN}}
) sub
WHERE sub.TIMESTAMP >= {{START_DATE}}
),
with_gaps AS (
SELECT
user_id,
activity_date,
LAG(activity_date) OVER (PARTITION BY user_id ORDER BY activity_date) AS prev_activity_date,
DATEDIFF('day', LAG(activity_date) OVER (PARTITION BY user_id ORDER BY activity_date), activity_date) AS gap_days
FROM user_activity_days
)
SELECT
g.user_id,
u.email,
u.username,
u.role,
g.prev_activity_date AS last_active_before_gap,
g.activity_date AS reactivation_date,
g.gap_days
FROM with_gaps g
LEFT JOIN (
SELECT id, MAX(email) AS email, MAX(username) AS username, MAX(role) AS role
FROM {{DATABASE}}.{{SCHEMA}}.USERS
GROUP BY id
) u ON u.id = g.user_id
WHERE g.gap_days >= 30
ORDER BY g.activity_date DESC, g.gap_days DESC;
Sample output
| USER_ID | USERNAME | ROLE | LAST_ACTIVE_BEFORE_GAP | REACTIVATION_DATE | GAP_DAYS | |
|---|---|---|---|---|---|---|
| usr_031 | [email protected] | dan | member | 2025-01-12 | 2025-03-04 | 51 |
| usr_047 | [email protected] | eve | guest | 2024-12-20 | 2025-02-18 | 60 |
| usr_008 | [email protected] | frank | member | 2025-01-28 | 2025-03-10 | 41 |
Day-N retention: Session to page view
For each cohort day (users who had any activity on a given date), returns the percentage who viewed a page on day N (0–N retention window). Set {{RETENTION_DAYS}} to control the window size, for example 14.
- Snowflake
-- daily_retention_session_to_pageview.sql
-- Day-N retention: of users who had activity (any event) on a given day,
-- what % visited a page on day N (N=0..13).
-- Uses daily user activity instead of amplitude session IDs.
--
-- Parameters:
-- {{START_DATE}} - e.g., '2025-01-01'
-- {{DOMAIN}} - e.g., 'acme.atlan.com'
-- {{RETENTION_DAYS}} - e.g., 14 (number of days in retention window)
WITH user_domains AS (
SELECT user_id, MAX(domain) AS domain
FROM {{DATABASE}}.{{SCHEMA}}.PAGES
WHERE domain IS NOT NULL
GROUP BY user_id
),
-- Users with any activity per day (cohort entry)
activity_days AS (
SELECT DISTINCT
sub.user_id,
DATE(CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', sub.TIMESTAMP)) AS cohort_date
FROM (
SELECT t.user_id, t.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.TRACKS t
INNER JOIN user_domains ud ON ud.user_id = t.user_id
WHERE t.TIMESTAMP >= {{START_DATE}}
AND t.event_text NOT IN (
'atlan_analaytics_aggregateinfo_fetch',
'api_error_emit', 'api_evaluator_cancelled', 'api_evaluator_succeeded',
'Experiment Started', '$experiment_started',
'web_vital_metric_inp_track', 'web_vital_metric_ttfb_track',
'performance_metric_user_timing_discovery_search',
'performance_metric_user_timing_app_bootstrap',
'web_vital_metric_fcp_track', 'web_vital_metric_lcp_track'
)
AND t.event_text NOT LIKE 'workflow_%'
AND ud.domain = {{DOMAIN}}
UNION ALL
SELECT p.user_id, p.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.PAGES p
WHERE p.TIMESTAMP >= {{START_DATE}}
AND p.domain = {{DOMAIN}}
) sub
),
-- Users with pageviews per day (return signal)
pageview_days AS (
SELECT DISTINCT
p.user_id,
DATE(CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', p.TIMESTAMP)) AS pv_date
FROM {{DATABASE}}.{{SCHEMA}}.PAGES p
WHERE p.TIMESTAMP >= {{START_DATE}}
AND p.domain = {{DOMAIN}}
),
-- Day offsets
day_offsets AS (
SELECT ROW_NUMBER() OVER (ORDER BY SEQ4()) - 1 AS day_n
FROM TABLE(GENERATOR(ROWCOUNT => {{RETENTION_DAYS}}))
),
-- Cohort sizes per day
cohort_sizes AS (
SELECT cohort_date, COUNT(DISTINCT user_id) AS cohort_size
FROM activity_days
GROUP BY cohort_date
),
-- Retention per cohort_date and day_n
retention AS (
SELECT
ad.cohort_date,
d.day_n,
COUNT(DISTINCT CASE WHEN pv.user_id IS NOT NULL THEN ad.user_id END) AS retained_users
FROM activity_days ad
CROSS JOIN day_offsets d
LEFT JOIN pageview_days pv
ON pv.user_id = ad.user_id
AND pv.pv_date = DATEADD('day', d.day_n, ad.cohort_date)
GROUP BY ad.cohort_date, d.day_n
)
SELECT
r.cohort_date,
cs.cohort_size,
r.day_n,
r.retained_users,
ROUND(100.0 * r.retained_users / NULLIF(cs.cohort_size, 0), 1) AS retention_pct
FROM retention r
INNER JOIN cohort_sizes cs ON cs.cohort_date = r.cohort_date
WHERE r.cohort_date <= DATEADD('day', -{{RETENTION_DAYS}}, CURRENT_DATE()) -- Only complete cohorts
ORDER BY r.cohort_date DESC, r.day_n;
Sample output
| COHORT_DATE | COHORT_SIZE | DAY_N | RETAINED_USERS | RETENTION_PCT |
|---|---|---|---|---|
| 2025-03-01 | 52 | 0 | 52 | 100.0 |
| 2025-03-01 | 52 | 1 | 38 | 73.1 |
| 2025-03-01 | 52 | 7 | 29 | 55.8 |
| 2025-03-01 | 52 | 13 | 22 | 42.3 |
Day-N retention: Session to search
For each cohort day, returns the percentage of users who performed a search or AI conversation action on day N. Uses discovery_search_results and atlan_ai_conversation_prompt_submitted events as the return signal.
- Snowflake
-- daily_retention_session_to_search.sql
-- Day-N retention: of users who had activity (any event) on a given day,
-- what % performed a search or AI action on day N (N=0..13).
-- Uses daily user activity instead of amplitude session IDs.
--
-- Search/AI events: discovery_search_results, atlan_ai_conversation_prompt_submitted
--
-- Parameters:
-- {{START_DATE}} - e.g., '2025-01-01'
-- {{DOMAIN}} - e.g., 'acme.atlan.com'
-- {{RETENTION_DAYS}} - e.g., 14
WITH user_domains AS (
SELECT user_id, MAX(domain) AS domain
FROM {{DATABASE}}.{{SCHEMA}}.PAGES
WHERE domain IS NOT NULL
GROUP BY user_id
),
-- Users with any activity per day (cohort entry)
activity_days AS (
SELECT DISTINCT
sub.user_id,
DATE(CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', sub.TIMESTAMP)) AS cohort_date
FROM (
SELECT t.user_id, t.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.TRACKS t
INNER JOIN user_domains ud ON ud.user_id = t.user_id
WHERE t.TIMESTAMP >= {{START_DATE}}
AND t.event_text NOT IN (
'atlan_analaytics_aggregateinfo_fetch',
'api_error_emit', 'api_evaluator_cancelled', 'api_evaluator_succeeded',
'Experiment Started', '$experiment_started',
'web_vital_metric_inp_track', 'web_vital_metric_ttfb_track',
'performance_metric_user_timing_discovery_search',
'performance_metric_user_timing_app_bootstrap',
'web_vital_metric_fcp_track', 'web_vital_metric_lcp_track'
)
AND t.event_text NOT LIKE 'workflow_%'
AND ud.domain = {{DOMAIN}}
UNION ALL
SELECT p.user_id, p.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.PAGES p
WHERE p.TIMESTAMP >= {{START_DATE}}
AND p.domain = {{DOMAIN}}
) sub
),
-- Users with search/AI events per day (return signal)
search_days AS (
SELECT DISTINCT
t.user_id,
DATE(CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', t.TIMESTAMP)) AS search_date
FROM {{DATABASE}}.{{SCHEMA}}.TRACKS t
INNER JOIN user_domains ud ON ud.user_id = t.user_id
WHERE t.TIMESTAMP >= {{START_DATE}}
AND ud.domain = {{DOMAIN}}
AND t.event_text IN (
'discovery_search_results',
'atlan_ai_conversation_prompt_submitted'
)
),
day_offsets AS (
SELECT ROW_NUMBER() OVER (ORDER BY SEQ4()) - 1 AS day_n
FROM TABLE(GENERATOR(ROWCOUNT => {{RETENTION_DAYS}}))
),
cohort_sizes AS (
SELECT cohort_date, COUNT(DISTINCT user_id) AS cohort_size
FROM activity_days
GROUP BY cohort_date
),
retention AS (
SELECT
ad.cohort_date,
d.day_n,
COUNT(DISTINCT CASE WHEN sr.user_id IS NOT NULL THEN ad.user_id END) AS retained_users
FROM activity_days ad
CROSS JOIN day_offsets d
LEFT JOIN search_days sr
ON sr.user_id = ad.user_id
AND sr.search_date = DATEADD('day', d.day_n, ad.cohort_date)
GROUP BY ad.cohort_date, d.day_n
)
SELECT
r.cohort_date,
cs.cohort_size,
r.day_n,
r.retained_users,
ROUND(100.0 * r.retained_users / NULLIF(cs.cohort_size, 0), 1) AS retention_pct
FROM retention r
INNER JOIN cohort_sizes cs ON cs.cohort_date = r.cohort_date
WHERE r.cohort_date <= DATEADD('day', -{{RETENTION_DAYS}}, CURRENT_DATE())
ORDER BY r.cohort_date DESC, r.day_n;
Sample output
| COHORT_DATE | COHORT_SIZE | DAY_N | RETAINED_USERS | RETENTION_PCT |
|---|---|---|---|---|
| 2025-03-01 | 52 | 0 | 41 | 78.8 |
| 2025-03-01 | 52 | 1 | 28 | 53.8 |
| 2025-03-01 | 52 | 7 | 19 | 36.5 |
| 2025-03-01 | 52 | 13 | 14 | 26.9 |
Day-N retention: Session to session
For each cohort day, returns the percentage of users who had any activity again on day N. This is the broadest retention signal. Any return visit counts.
- Snowflake
-- daily_retention_session_to_session.sql
-- Day-N retention: of users who had activity (any event) on a given day,
-- what % had activity again on day N (N=0..13).
-- Uses daily user activity instead of amplitude session IDs.
--
-- Parameters:
-- {{START_DATE}} - e.g., '2025-01-01'
-- {{DOMAIN}} - e.g., 'acme.atlan.com'
-- {{RETENTION_DAYS}} - e.g., 14
WITH user_domains AS (
SELECT user_id, MAX(domain) AS domain
FROM {{DATABASE}}.{{SCHEMA}}.PAGES
WHERE domain IS NOT NULL
GROUP BY user_id
),
-- All activity days (used for both cohort entry and return signal)
activity_days AS (
SELECT DISTINCT
sub.user_id,
DATE(CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', sub.TIMESTAMP)) AS activity_date
FROM (
SELECT t.user_id, t.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.TRACKS t
INNER JOIN user_domains ud ON ud.user_id = t.user_id
WHERE t.TIMESTAMP >= {{START_DATE}}
AND t.event_text NOT IN (
'atlan_analaytics_aggregateinfo_fetch',
'api_error_emit', 'api_evaluator_cancelled', 'api_evaluator_succeeded',
'Experiment Started', '$experiment_started',
'web_vital_metric_inp_track', 'web_vital_metric_ttfb_track',
'performance_metric_user_timing_discovery_search',
'performance_metric_user_timing_app_bootstrap',
'web_vital_metric_fcp_track', 'web_vital_metric_lcp_track'
)
AND t.event_text NOT LIKE 'workflow_%'
AND ud.domain = {{DOMAIN}}
UNION ALL
SELECT p.user_id, p.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.PAGES p
WHERE p.TIMESTAMP >= {{START_DATE}}
AND p.domain = {{DOMAIN}}
) sub
),
day_offsets AS (
SELECT ROW_NUMBER() OVER (ORDER BY SEQ4()) - 1 AS day_n
FROM TABLE(GENERATOR(ROWCOUNT => {{RETENTION_DAYS}}))
),
cohort_sizes AS (
SELECT activity_date AS cohort_date, COUNT(DISTINCT user_id) AS cohort_size
FROM activity_days
GROUP BY activity_date
),
retention AS (
SELECT
ad.activity_date AS cohort_date,
d.day_n,
COUNT(DISTINCT CASE WHEN ret.user_id IS NOT NULL THEN ad.user_id END) AS retained_users
FROM activity_days ad
CROSS JOIN day_offsets d
LEFT JOIN activity_days ret
ON ret.user_id = ad.user_id
AND ret.activity_date = DATEADD('day', d.day_n, ad.activity_date)
GROUP BY ad.activity_date, d.day_n
)
SELECT
r.cohort_date,
cs.cohort_size,
r.day_n,
r.retained_users,
ROUND(100.0 * r.retained_users / NULLIF(cs.cohort_size, 0), 1) AS retention_pct
FROM retention r
INNER JOIN cohort_sizes cs ON cs.cohort_date = r.cohort_date
WHERE r.cohort_date <= DATEADD('day', -{{RETENTION_DAYS}}, CURRENT_DATE())
ORDER BY r.cohort_date DESC, r.day_n;
Sample output
| COHORT_DATE | COHORT_SIZE | DAY_N | RETAINED_USERS | RETENTION_PCT |
|---|---|---|---|---|
| 2025-03-01 | 52 | 0 | 52 | 100.0 |
| 2025-03-01 | 52 | 1 | 44 | 84.6 |
| 2025-03-01 | 52 | 7 | 35 | 67.3 |
| 2025-03-01 | 52 | 13 | 28 | 53.8 |
Funnel: Session to page view
A three-step funnel showing the conversion from active user to first pageview to two or more pageviews. Also splits step 2 and step 3 by whether the user performed a governance action in the same period.
- Snowflake
-- funnel_session_to_pageview.sql
-- Multi-step funnel analysis: active user -> pageview -> deeper engagement.
-- Uses "user had any event" instead of amplitude session checks.
--
-- Steps:
-- Step 1: User had any event (track or page)
-- Step 2: User viewed a page
-- Step 3: User viewed 2+ pages (optional)
--
-- Governance split: splits step 2+ by whether the user also
-- performed a governance action in the same period.
--
-- Parameters:
-- {{START_DATE}} - e.g., '2025-01-01'
-- {{END_DATE}} - e.g., '2025-12-31'
-- {{DOMAIN}} - e.g., 'acme.atlan.com'
WITH user_domains AS (
SELECT user_id, MAX(domain) AS domain
FROM {{DATABASE}}.{{SCHEMA}}.PAGES
WHERE domain IS NOT NULL
GROUP BY user_id
),
-- Step 1: Users with at least one event (any activity)
active_users AS (
SELECT DISTINCT sub.user_id
FROM (
SELECT t.user_id, t.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.TRACKS t
INNER JOIN user_domains ud ON ud.user_id = t.user_id
WHERE t.TIMESTAMP >= {{START_DATE}} AND t.TIMESTAMP < {{END_DATE}}
AND t.event_text NOT IN (
'atlan_analaytics_aggregateinfo_fetch',
'api_error_emit', 'api_evaluator_cancelled', 'api_evaluator_succeeded',
'Experiment Started', '$experiment_started',
'web_vital_metric_inp_track', 'web_vital_metric_ttfb_track',
'performance_metric_user_timing_discovery_search',
'performance_metric_user_timing_app_bootstrap',
'web_vital_metric_fcp_track', 'web_vital_metric_lcp_track'
)
AND t.event_text NOT LIKE 'workflow_%'
AND ud.domain = {{DOMAIN}}
UNION ALL
SELECT p.user_id, p.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.PAGES p
WHERE p.TIMESTAMP >= {{START_DATE}} AND p.TIMESTAMP < {{END_DATE}}
AND p.domain = {{DOMAIN}}
) sub
),
-- Step 2: Users with at least one pageview
pageview_counts AS (
SELECT
p.user_id,
COUNT(*) AS pv_count
FROM {{DATABASE}}.{{SCHEMA}}.PAGES p
WHERE p.TIMESTAMP >= {{START_DATE}} AND p.TIMESTAMP < {{END_DATE}}
AND p.domain = {{DOMAIN}}
GROUP BY p.user_id
),
-- Governance action flag per user
governance_users AS (
SELECT DISTINCT t.user_id
FROM {{DATABASE}}.{{SCHEMA}}.TRACKS t
INNER JOIN user_domains ud ON ud.user_id = t.user_id
WHERE t.TIMESTAMP >= {{START_DATE}} AND t.TIMESTAMP < {{END_DATE}}
AND ud.domain = {{DOMAIN}}
AND (
t.event_text LIKE 'governance_%'
OR t.event_text LIKE 'gtc_tree_create_%'
OR t.event_text LIKE 'asset_update_%'
)
),
-- Funnel summary
funnel AS (
SELECT
'Step 1: Active User' AS step_name,
1 AS step_order,
COUNT(DISTINCT au.user_id) AS total_users,
NULL AS with_governance,
NULL AS without_governance
FROM active_users au
UNION ALL
SELECT
'Step 2: Pageview' AS step_name,
2 AS step_order,
COUNT(DISTINCT au.user_id) AS total_users,
COUNT(DISTINCT CASE WHEN gu.user_id IS NOT NULL THEN au.user_id END) AS with_governance,
COUNT(DISTINCT CASE WHEN gu.user_id IS NULL THEN au.user_id END) AS without_governance
FROM active_users au
INNER JOIN pageview_counts pc ON pc.user_id = au.user_id AND pc.pv_count >= 1
LEFT JOIN governance_users gu ON gu.user_id = au.user_id
UNION ALL
SELECT
'Step 3: 2+ Pageviews' AS step_name,
3 AS step_order,
COUNT(DISTINCT au.user_id) AS total_users,
COUNT(DISTINCT CASE WHEN gu.user_id IS NOT NULL THEN au.user_id END) AS with_governance,
COUNT(DISTINCT CASE WHEN gu.user_id IS NULL THEN au.user_id END) AS without_governance
FROM active_users au
INNER JOIN pageview_counts pc ON pc.user_id = au.user_id AND pc.pv_count >= 2
LEFT JOIN governance_users gu ON gu.user_id = au.user_id
)
SELECT
step_name,
step_order,
total_users,
with_governance,
without_governance,
ROUND(100.0 * total_users / NULLIF(FIRST_VALUE(total_users) OVER (ORDER BY step_order), 0), 1) AS conversion_from_step1_pct,
ROUND(100.0 * total_users / NULLIF(LAG(total_users) OVER (ORDER BY step_order), 0), 1) AS conversion_from_prev_pct
FROM funnel
ORDER BY step_order;
Sample output
| STEP_NAME | STEP_ORDER | TOTAL_USERS | WITH_GOVERNANCE | WITHOUT_GOVERNANCE | CONVERSION_FROM_STEP1_PCT | CONVERSION_FROM_PREV_PCT |
|---|---|---|---|---|---|---|
| Step 1: Active User | 1 | 124 | null | null | 100.0 | null |
| Step 2: Pageview | 2 | 118 | 38 | 80 | 95.2 | 95.2 |
| Step 3: 2+ Pageviews | 3 | 104 | 34 | 70 | 83.9 | 88.1 |
Monthly retention cohort
Builds a triangular cohort retention matrix. Each row represents a monthly cohort (users first seen in that month) and columns show how many returned in month 1, 2, 3, and later months.
- Snowflake
-- monthly_retention_cohort.sql
-- Cohort retention: what % of users who first appeared in month X returned in month X+1, X+2, etc.
-- Output is a triangular retention matrix.
--
-- Parameters:
-- {{START_DATE}} - e.g., '2025-01-01'
-- {{DOMAIN}} - e.g., 'acme.atlan.com' (or replace filter with 1=1 for all)
WITH user_domains AS (
SELECT user_id, MAX(domain) AS domain
FROM {{DATABASE}}.{{SCHEMA}}.PAGES
WHERE domain IS NOT NULL
GROUP BY user_id
),
user_months AS (
SELECT DISTINCT
sub.user_id,
ud.domain,
DATE_TRUNC('MONTH', CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', sub.TIMESTAMP)) AS activity_month
FROM (
SELECT t.user_id, t.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.TRACKS t
INNER JOIN user_domains ud2 ON ud2.user_id = t.user_id
WHERE t.event_text NOT IN (
'atlan_analaytics_aggregateinfo_fetch',
'api_error_emit', 'api_evaluator_cancelled', 'api_evaluator_succeeded',
'Experiment Started', '$experiment_started',
'web_vital_metric_inp_track', 'web_vital_metric_ttfb_track',
'performance_metric_user_timing_discovery_search',
'performance_metric_user_timing_app_bootstrap',
'web_vital_metric_fcp_track', 'web_vital_metric_lcp_track'
)
AND t.event_text NOT LIKE 'workflow_%'
AND ud2.domain = {{DOMAIN}}
UNION ALL
SELECT p.user_id, p.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.PAGES p
WHERE p.domain = {{DOMAIN}}
) sub
INNER JOIN user_domains ud ON ud.user_id = sub.user_id
WHERE sub.TIMESTAMP >= {{START_DATE}}
),
cohorts AS (
SELECT
user_id,
domain,
MIN(activity_month) AS cohort_month
FROM user_months
GROUP BY user_id, domain
),
retention AS (
SELECT
c.cohort_month,
DATEDIFF('month', c.cohort_month, um.activity_month) AS months_since_start,
COUNT(DISTINCT um.user_id) AS active_users
FROM cohorts c
INNER JOIN user_months um ON um.user_id = c.user_id AND um.domain = c.domain
GROUP BY c.cohort_month, months_since_start
),
cohort_sizes AS (
SELECT cohort_month, COUNT(DISTINCT user_id) AS cohort_size
FROM cohorts
GROUP BY cohort_month
)
SELECT
r.cohort_month,
cs.cohort_size,
r.months_since_start,
r.active_users,
ROUND(100.0 * r.active_users / cs.cohort_size, 1) AS retention_pct
FROM retention r
JOIN cohort_sizes cs ON cs.cohort_month = r.cohort_month
ORDER BY r.cohort_month, r.months_since_start;
Sample output
| COHORT_MONTH | COHORT_SIZE | MONTHS_SINCE_START | ACTIVE_USERS | RETENTION_PCT |
|---|---|---|---|---|
| 2025-01-01 | 87 | 0 | 87 | 100.0 |
| 2025-01-01 | 87 | 1 | 64 | 73.6 |
| 2025-01-01 | 87 | 2 | 58 | 66.7 |
| 2025-02-01 | 31 | 0 | 31 | 100.0 |
| 2025-02-01 | 31 | 1 | 24 | 77.4 |
Aggregate retention rate
Returns a per-week aggregate retention rate: of all users who had any activity in a given week, what percentage also had a page view within 7 days of their first activity. Provides a single high-level retention signal without per-cohort granularity.
- Snowflake
-- retention_rate_aggregate.sql
-- Aggregate retention rate: of users with any activity, what % had a pageview
-- within 7 days? Returns a single per-week summary (not per-cohort).
-- Uses daily user activity instead of amplitude session IDs.
--
-- Parameters:
-- {{START_DATE}} - e.g., '2025-01-01'
-- {{DOMAIN}} - e.g., 'acme.atlan.com'
WITH user_domains AS (
SELECT user_id, MAX(domain) AS domain
FROM {{DATABASE}}.{{SCHEMA}}.PAGES
WHERE domain IS NOT NULL
GROUP BY user_id
),
-- First activity date per user (entry point)
first_activity AS (
SELECT
sub.user_id,
MIN(DATE(CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', sub.TIMESTAMP))) AS first_activity_date
FROM (
SELECT t.user_id, t.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.TRACKS t
INNER JOIN user_domains ud ON ud.user_id = t.user_id
WHERE t.TIMESTAMP >= {{START_DATE}}
AND t.event_text NOT IN (
'atlan_analaytics_aggregateinfo_fetch',
'api_error_emit', 'api_evaluator_cancelled', 'api_evaluator_succeeded',
'Experiment Started', '$experiment_started',
'web_vital_metric_inp_track', 'web_vital_metric_ttfb_track',
'performance_metric_user_timing_discovery_search',
'performance_metric_user_timing_app_bootstrap',
'web_vital_metric_fcp_track', 'web_vital_metric_lcp_track'
)
AND t.event_text NOT LIKE 'workflow_%'
AND ud.domain = {{DOMAIN}}
UNION ALL
SELECT p.user_id, p.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.PAGES p
WHERE p.TIMESTAMP >= {{START_DATE}}
AND p.domain = {{DOMAIN}}
) sub
GROUP BY sub.user_id
),
-- First pageview within 7 days of first activity
pageview_within_7d AS (
SELECT
fa.user_id,
fa.first_activity_date,
MIN(DATE(CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', p.TIMESTAMP))) AS first_pv_date
FROM first_activity fa
INNER JOIN {{DATABASE}}.{{SCHEMA}}.PAGES p
ON p.user_id = fa.user_id
AND DATE(CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', p.TIMESTAMP))
BETWEEN fa.first_activity_date AND DATEADD('day', 7, fa.first_activity_date)
WHERE p.TIMESTAMP >= {{START_DATE}}
AND p.domain = {{DOMAIN}}
GROUP BY fa.user_id, fa.first_activity_date
)
SELECT
DATE_TRUNC('WEEK', fa.first_activity_date) AS cohort_week,
COUNT(DISTINCT fa.user_id) AS users_with_activity,
COUNT(DISTINCT pv.user_id) AS users_with_pageview_7d,
ROUND(100.0 * COUNT(DISTINCT pv.user_id) / NULLIF(COUNT(DISTINCT fa.user_id), 0), 1) AS retention_rate_pct
FROM first_activity fa
LEFT JOIN pageview_within_7d pv ON pv.user_id = fa.user_id
WHERE fa.first_activity_date <= DATEADD('day', -7, CURRENT_DATE()) -- Only complete 7-day windows
GROUP BY cohort_week
ORDER BY cohort_week DESC;
Sample output
| COHORT_WEEK | USERS_WITH_ACTIVITY | USERS_WITH_PAGEVIEW_7D | RETENTION_RATE_PCT |
|---|---|---|---|
| 2025-03-10 | 89 | 76 | 85.4 |
| 2025-03-03 | 94 | 81 | 86.2 |
| 2025-02-24 | 81 | 67 | 82.7 |