Skip to main content

Measure retention

You can use Lakehouse to measure whether users return after their first interaction and how retention changes over time. This page provides queries for retention in the USAGE_ANALYTICS schema, including activation speed, churn, reactivation, day-N retention, funnel analysis, and cohort retention.

Before you begin

Before you run these queries, make sure:

  • Your Lakehouse setup is complete and Lakehouse is enabled for your organization. If setup isn't complete, see Get started with Lakehouse.

Query parameters

These queries use the tables in the USAGE_ANALYTICS schema. For table definitions, see USAGE_ANALYTICS namespace. Replace the placeholders in each query with values for your environment.

  • {{DATABASE}}: Snowflake database name
  • {{SCHEMA}}: Use USAGE_ANALYTICS
  • {{DOMAIN}}: Customer domain, for example 'acme.atlan.com'
  • {{START_DATE}}: Start date, for example '2025-01-01'
  • {{END_DATE}}: End date, for example '2025-12-31'
  • {{RETENTION_DAYS}}: Number of days in the retention window, for example 14

Activation funnel

Shows how quickly new users (created since {{START_DATE}}) take their first action, with cumulative activation rates at 1, 7, 14, and 30 days after account creation. Grouped by creation month.

-- activation_funnel.sql
-- New user activation: how quickly do new users take their first action?
-- Shows % activated within 1 day, 7 days, 14 days, 30 days of account creation.
--
-- Parameters:
-- {{START_DATE}} - e.g., '2025-01-01' (for user creation date filter)
-- {{DOMAIN}} - e.g., 'acme.atlan.com'

WITH user_domains AS (
SELECT user_id, MAX(domain) AS domain
FROM {{DATABASE}}.{{SCHEMA}}.PAGES
WHERE domain IS NOT NULL
GROUP BY user_id
),

users_deduped AS (
SELECT
id,
MAX(role) AS role,
MIN(created_at) AS user_created_at
FROM {{DATABASE}}.{{SCHEMA}}.USERS
GROUP BY id
),

first_activity AS (
SELECT
user_id,
MIN(CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', TIMESTAMP)) AS first_event_ts
FROM (
SELECT t.user_id, t.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.TRACKS t
INNER JOIN user_domains ud ON ud.user_id = t.user_id
WHERE t.event_text NOT IN (
'atlan_analaytics_aggregateinfo_fetch',
'api_error_emit', 'api_evaluator_cancelled', 'api_evaluator_succeeded',
'Experiment Started', '$experiment_started',
'web_vital_metric_inp_track', 'web_vital_metric_ttfb_track',
'performance_metric_user_timing_discovery_search',
'performance_metric_user_timing_app_bootstrap',
'web_vital_metric_fcp_track', 'web_vital_metric_lcp_track'
)
AND t.event_text NOT LIKE 'workflow_%'
AND ud.domain = {{DOMAIN}}
UNION ALL
SELECT p.user_id, p.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.PAGES p
WHERE p.domain = {{DOMAIN}}
)
GROUP BY user_id
),

new_users AS (
SELECT
ud.user_id,
u.role,
u.user_created_at,
DATE_TRUNC('MONTH', u.user_created_at) AS creation_month,
fa.first_event_ts,
DATEDIFF('day', u.user_created_at, fa.first_event_ts) AS days_to_first_action
FROM user_domains ud
INNER JOIN users_deduped u ON u.id = ud.user_id
LEFT JOIN first_activity fa ON fa.user_id = ud.user_id
WHERE ud.domain = {{DOMAIN}}
AND u.user_created_at >= {{START_DATE}}
)

SELECT
creation_month,
COUNT(*) AS total_new_users,
COUNT(CASE WHEN days_to_first_action <= 1 THEN 1 END) AS activated_1d,
COUNT(CASE WHEN days_to_first_action <= 7 THEN 1 END) AS activated_7d,
COUNT(CASE WHEN days_to_first_action <= 14 THEN 1 END) AS activated_14d,
COUNT(CASE WHEN days_to_first_action <= 30 THEN 1 END) AS activated_30d,
COUNT(CASE WHEN days_to_first_action IS NULL THEN 1 END) AS never_activated,
ROUND(100.0 * COUNT(CASE WHEN days_to_first_action <= 1 THEN 1 END) / COUNT(*), 1) AS pct_1d,
ROUND(100.0 * COUNT(CASE WHEN days_to_first_action <= 7 THEN 1 END) / COUNT(*), 1) AS pct_7d,
ROUND(100.0 * COUNT(CASE WHEN days_to_first_action <= 30 THEN 1 END) / COUNT(*), 1) AS pct_30d
FROM new_users
GROUP BY creation_month
ORDER BY creation_month DESC;
Sample output
CREATION_MONTHTOTAL_NEW_USERSACTIVATED_1DACTIVATED_7DACTIVATED_14DACTIVATED_30DNEVER_ACTIVATEDPCT_1DPCT_7DPCT_30D
2025-03-011814161717177.888.994.4
2025-02-01129111112075.091.7100.0
2025-01-012115181920171.485.795.2

Churned users

Returns users who were active in the prior month but didn't appear in the current month. Includes email, role, job role, and the month in which they were last active.

-- churned_users.sql
-- Users active in the prior month but NOT in the current month.
-- Includes last activity date, role, and top features used.
--
-- Parameters:
-- {{DOMAIN}} - e.g., 'acme.atlan.com'

WITH user_domains AS (
SELECT user_id, MAX(domain) AS domain
FROM {{DATABASE}}.{{SCHEMA}}.PAGES
WHERE domain IS NOT NULL
GROUP BY user_id
),

user_months AS (
SELECT DISTINCT
sub.user_id,
DATE_TRUNC('MONTH', CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', sub.TIMESTAMP)) AS activity_month
FROM (
SELECT t.user_id, t.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.TRACKS t
INNER JOIN user_domains ud ON ud.user_id = t.user_id
WHERE t.event_text NOT IN (
'atlan_analaytics_aggregateinfo_fetch',
'api_error_emit', 'api_evaluator_cancelled', 'api_evaluator_succeeded',
'Experiment Started', '$experiment_started',
'web_vital_metric_inp_track', 'web_vital_metric_ttfb_track',
'performance_metric_user_timing_discovery_search',
'performance_metric_user_timing_app_bootstrap',
'web_vital_metric_fcp_track', 'web_vital_metric_lcp_track'
)
AND t.event_text NOT LIKE 'workflow_%'
AND ud.domain = {{DOMAIN}}
UNION ALL
SELECT p.user_id, p.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.PAGES p
WHERE p.domain = {{DOMAIN}}
) sub
WHERE sub.TIMESTAMP >= DATEADD('month', -3, CURRENT_TIMESTAMP())
),

prev_month_users AS (
SELECT user_id
FROM user_months
WHERE activity_month = DATE_TRUNC('MONTH', DATEADD('month', -1, CURRENT_TIMESTAMP()))
),

curr_month_users AS (
SELECT DISTINCT user_id
FROM user_months
WHERE activity_month = DATE_TRUNC('MONTH', CURRENT_TIMESTAMP())
)

SELECT
p.user_id,
u.email,
u.username,
u.role,
u.job_role,
DATE_TRUNC('MONTH', DATEADD('month', -1, CURRENT_TIMESTAMP())) AS last_active_month
FROM prev_month_users p
LEFT JOIN curr_month_users c ON c.user_id = p.user_id
LEFT JOIN (
SELECT id, MAX(email) AS email, MAX(username) AS username, MAX(role) AS role, MAX(job_role) AS job_role
FROM {{DATABASE}}.{{SCHEMA}}.USERS
GROUP BY id
) u ON u.id = p.user_id
WHERE c.user_id IS NULL
ORDER BY p.user_id;
Sample output
USER_IDEMAILUSERNAMEROLEJOB_ROLELAST_ACTIVE_MONTH
usr_012[email protected]carolmemberBusiness Analyst2025-02-01
usr_031[email protected]danmemberData Analyst2025-02-01
usr_047[email protected]eveguestViewer2025-02-01

Reactivated users

Returns users who were inactive for 30 or more days and then returned. Shows the previous active date, reactivation date, and the gap in days.

-- reactivated_users.sql
-- Users who were inactive for 30+ days and then returned.
-- Shows the gap duration and what they did on return.
--
-- Parameters:
-- {{START_DATE}} - e.g., '2025-01-01'
-- {{DOMAIN}} - e.g., 'acme.atlan.com'

WITH user_domains AS (
SELECT user_id, MAX(domain) AS domain
FROM {{DATABASE}}.{{SCHEMA}}.PAGES
WHERE domain IS NOT NULL
GROUP BY user_id
),

user_activity_days AS (
SELECT DISTINCT
sub.user_id,
DATE(CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', sub.TIMESTAMP)) AS activity_date
FROM (
SELECT t.user_id, t.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.TRACKS t
INNER JOIN user_domains ud ON ud.user_id = t.user_id
WHERE t.event_text NOT IN (
'atlan_analaytics_aggregateinfo_fetch',
'api_error_emit', 'api_evaluator_cancelled', 'api_evaluator_succeeded',
'Experiment Started', '$experiment_started',
'web_vital_metric_inp_track', 'web_vital_metric_ttfb_track',
'performance_metric_user_timing_discovery_search',
'performance_metric_user_timing_app_bootstrap',
'web_vital_metric_fcp_track', 'web_vital_metric_lcp_track'
)
AND t.event_text NOT LIKE 'workflow_%'
AND ud.domain = {{DOMAIN}}
UNION ALL
SELECT p.user_id, p.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.PAGES p
WHERE p.domain = {{DOMAIN}}
) sub
WHERE sub.TIMESTAMP >= {{START_DATE}}
),

with_gaps AS (
SELECT
user_id,
activity_date,
LAG(activity_date) OVER (PARTITION BY user_id ORDER BY activity_date) AS prev_activity_date,
DATEDIFF('day', LAG(activity_date) OVER (PARTITION BY user_id ORDER BY activity_date), activity_date) AS gap_days
FROM user_activity_days
)

SELECT
g.user_id,
u.email,
u.username,
u.role,
g.prev_activity_date AS last_active_before_gap,
g.activity_date AS reactivation_date,
g.gap_days
FROM with_gaps g
LEFT JOIN (
SELECT id, MAX(email) AS email, MAX(username) AS username, MAX(role) AS role
FROM {{DATABASE}}.{{SCHEMA}}.USERS
GROUP BY id
) u ON u.id = g.user_id
WHERE g.gap_days >= 30
ORDER BY g.activity_date DESC, g.gap_days DESC;
Sample output
USER_IDEMAILUSERNAMEROLELAST_ACTIVE_BEFORE_GAPREACTIVATION_DATEGAP_DAYS
usr_031[email protected]danmember2025-01-122025-03-0451
usr_047[email protected]eveguest2024-12-202025-02-1860
usr_008[email protected]frankmember2025-01-282025-03-1041

Day-N retention: Session to page view

For each cohort day (users who had any activity on a given date), returns the percentage who viewed a page on day N (0–N retention window). Set {{RETENTION_DAYS}} to control the window size, for example 14.

-- daily_retention_session_to_pageview.sql
-- Day-N retention: of users who had activity (any event) on a given day,
-- what % visited a page on day N (N=0..13).
-- Uses daily user activity instead of amplitude session IDs.
--
-- Parameters:
-- {{START_DATE}} - e.g., '2025-01-01'
-- {{DOMAIN}} - e.g., 'acme.atlan.com'
-- {{RETENTION_DAYS}} - e.g., 14 (number of days in retention window)

WITH user_domains AS (
SELECT user_id, MAX(domain) AS domain
FROM {{DATABASE}}.{{SCHEMA}}.PAGES
WHERE domain IS NOT NULL
GROUP BY user_id
),

-- Users with any activity per day (cohort entry)
activity_days AS (
SELECT DISTINCT
sub.user_id,
DATE(CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', sub.TIMESTAMP)) AS cohort_date
FROM (
SELECT t.user_id, t.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.TRACKS t
INNER JOIN user_domains ud ON ud.user_id = t.user_id
WHERE t.TIMESTAMP >= {{START_DATE}}
AND t.event_text NOT IN (
'atlan_analaytics_aggregateinfo_fetch',
'api_error_emit', 'api_evaluator_cancelled', 'api_evaluator_succeeded',
'Experiment Started', '$experiment_started',
'web_vital_metric_inp_track', 'web_vital_metric_ttfb_track',
'performance_metric_user_timing_discovery_search',
'performance_metric_user_timing_app_bootstrap',
'web_vital_metric_fcp_track', 'web_vital_metric_lcp_track'
)
AND t.event_text NOT LIKE 'workflow_%'
AND ud.domain = {{DOMAIN}}
UNION ALL
SELECT p.user_id, p.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.PAGES p
WHERE p.TIMESTAMP >= {{START_DATE}}
AND p.domain = {{DOMAIN}}
) sub
),

-- Users with pageviews per day (return signal)
pageview_days AS (
SELECT DISTINCT
p.user_id,
DATE(CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', p.TIMESTAMP)) AS pv_date
FROM {{DATABASE}}.{{SCHEMA}}.PAGES p
WHERE p.TIMESTAMP >= {{START_DATE}}
AND p.domain = {{DOMAIN}}
),

-- Day offsets
day_offsets AS (
SELECT ROW_NUMBER() OVER (ORDER BY SEQ4()) - 1 AS day_n
FROM TABLE(GENERATOR(ROWCOUNT => {{RETENTION_DAYS}}))
),

-- Cohort sizes per day
cohort_sizes AS (
SELECT cohort_date, COUNT(DISTINCT user_id) AS cohort_size
FROM activity_days
GROUP BY cohort_date
),

-- Retention per cohort_date and day_n
retention AS (
SELECT
ad.cohort_date,
d.day_n,
COUNT(DISTINCT CASE WHEN pv.user_id IS NOT NULL THEN ad.user_id END) AS retained_users
FROM activity_days ad
CROSS JOIN day_offsets d
LEFT JOIN pageview_days pv
ON pv.user_id = ad.user_id
AND pv.pv_date = DATEADD('day', d.day_n, ad.cohort_date)
GROUP BY ad.cohort_date, d.day_n
)

SELECT
r.cohort_date,
cs.cohort_size,
r.day_n,
r.retained_users,
ROUND(100.0 * r.retained_users / NULLIF(cs.cohort_size, 0), 1) AS retention_pct
FROM retention r
INNER JOIN cohort_sizes cs ON cs.cohort_date = r.cohort_date
WHERE r.cohort_date <= DATEADD('day', -{{RETENTION_DAYS}}, CURRENT_DATE()) -- Only complete cohorts
ORDER BY r.cohort_date DESC, r.day_n;
Sample output
COHORT_DATECOHORT_SIZEDAY_NRETAINED_USERSRETENTION_PCT
2025-03-0152052100.0
2025-03-015213873.1
2025-03-015272955.8
2025-03-0152132242.3

For each cohort day, returns the percentage of users who performed a search or AI conversation action on day N. Uses discovery_search_results and atlan_ai_conversation_prompt_submitted events as the return signal.

-- daily_retention_session_to_search.sql
-- Day-N retention: of users who had activity (any event) on a given day,
-- what % performed a search or AI action on day N (N=0..13).
-- Uses daily user activity instead of amplitude session IDs.
--
-- Search/AI events: discovery_search_results, atlan_ai_conversation_prompt_submitted
--
-- Parameters:
-- {{START_DATE}} - e.g., '2025-01-01'
-- {{DOMAIN}} - e.g., 'acme.atlan.com'
-- {{RETENTION_DAYS}} - e.g., 14

WITH user_domains AS (
SELECT user_id, MAX(domain) AS domain
FROM {{DATABASE}}.{{SCHEMA}}.PAGES
WHERE domain IS NOT NULL
GROUP BY user_id
),

-- Users with any activity per day (cohort entry)
activity_days AS (
SELECT DISTINCT
sub.user_id,
DATE(CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', sub.TIMESTAMP)) AS cohort_date
FROM (
SELECT t.user_id, t.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.TRACKS t
INNER JOIN user_domains ud ON ud.user_id = t.user_id
WHERE t.TIMESTAMP >= {{START_DATE}}
AND t.event_text NOT IN (
'atlan_analaytics_aggregateinfo_fetch',
'api_error_emit', 'api_evaluator_cancelled', 'api_evaluator_succeeded',
'Experiment Started', '$experiment_started',
'web_vital_metric_inp_track', 'web_vital_metric_ttfb_track',
'performance_metric_user_timing_discovery_search',
'performance_metric_user_timing_app_bootstrap',
'web_vital_metric_fcp_track', 'web_vital_metric_lcp_track'
)
AND t.event_text NOT LIKE 'workflow_%'
AND ud.domain = {{DOMAIN}}
UNION ALL
SELECT p.user_id, p.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.PAGES p
WHERE p.TIMESTAMP >= {{START_DATE}}
AND p.domain = {{DOMAIN}}
) sub
),

-- Users with search/AI events per day (return signal)
search_days AS (
SELECT DISTINCT
t.user_id,
DATE(CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', t.TIMESTAMP)) AS search_date
FROM {{DATABASE}}.{{SCHEMA}}.TRACKS t
INNER JOIN user_domains ud ON ud.user_id = t.user_id
WHERE t.TIMESTAMP >= {{START_DATE}}
AND ud.domain = {{DOMAIN}}
AND t.event_text IN (
'discovery_search_results',
'atlan_ai_conversation_prompt_submitted'
)
),

day_offsets AS (
SELECT ROW_NUMBER() OVER (ORDER BY SEQ4()) - 1 AS day_n
FROM TABLE(GENERATOR(ROWCOUNT => {{RETENTION_DAYS}}))
),

cohort_sizes AS (
SELECT cohort_date, COUNT(DISTINCT user_id) AS cohort_size
FROM activity_days
GROUP BY cohort_date
),

retention AS (
SELECT
ad.cohort_date,
d.day_n,
COUNT(DISTINCT CASE WHEN sr.user_id IS NOT NULL THEN ad.user_id END) AS retained_users
FROM activity_days ad
CROSS JOIN day_offsets d
LEFT JOIN search_days sr
ON sr.user_id = ad.user_id
AND sr.search_date = DATEADD('day', d.day_n, ad.cohort_date)
GROUP BY ad.cohort_date, d.day_n
)

SELECT
r.cohort_date,
cs.cohort_size,
r.day_n,
r.retained_users,
ROUND(100.0 * r.retained_users / NULLIF(cs.cohort_size, 0), 1) AS retention_pct
FROM retention r
INNER JOIN cohort_sizes cs ON cs.cohort_date = r.cohort_date
WHERE r.cohort_date <= DATEADD('day', -{{RETENTION_DAYS}}, CURRENT_DATE())
ORDER BY r.cohort_date DESC, r.day_n;
Sample output
COHORT_DATECOHORT_SIZEDAY_NRETAINED_USERSRETENTION_PCT
2025-03-015204178.8
2025-03-015212853.8
2025-03-015271936.5
2025-03-0152131426.9

Day-N retention: Session to session

For each cohort day, returns the percentage of users who had any activity again on day N. This is the broadest retention signal. Any return visit counts.

-- daily_retention_session_to_session.sql
-- Day-N retention: of users who had activity (any event) on a given day,
-- what % had activity again on day N (N=0..13).
-- Uses daily user activity instead of amplitude session IDs.
--
-- Parameters:
-- {{START_DATE}} - e.g., '2025-01-01'
-- {{DOMAIN}} - e.g., 'acme.atlan.com'
-- {{RETENTION_DAYS}} - e.g., 14

WITH user_domains AS (
SELECT user_id, MAX(domain) AS domain
FROM {{DATABASE}}.{{SCHEMA}}.PAGES
WHERE domain IS NOT NULL
GROUP BY user_id
),

-- All activity days (used for both cohort entry and return signal)
activity_days AS (
SELECT DISTINCT
sub.user_id,
DATE(CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', sub.TIMESTAMP)) AS activity_date
FROM (
SELECT t.user_id, t.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.TRACKS t
INNER JOIN user_domains ud ON ud.user_id = t.user_id
WHERE t.TIMESTAMP >= {{START_DATE}}
AND t.event_text NOT IN (
'atlan_analaytics_aggregateinfo_fetch',
'api_error_emit', 'api_evaluator_cancelled', 'api_evaluator_succeeded',
'Experiment Started', '$experiment_started',
'web_vital_metric_inp_track', 'web_vital_metric_ttfb_track',
'performance_metric_user_timing_discovery_search',
'performance_metric_user_timing_app_bootstrap',
'web_vital_metric_fcp_track', 'web_vital_metric_lcp_track'
)
AND t.event_text NOT LIKE 'workflow_%'
AND ud.domain = {{DOMAIN}}
UNION ALL
SELECT p.user_id, p.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.PAGES p
WHERE p.TIMESTAMP >= {{START_DATE}}
AND p.domain = {{DOMAIN}}
) sub
),

day_offsets AS (
SELECT ROW_NUMBER() OVER (ORDER BY SEQ4()) - 1 AS day_n
FROM TABLE(GENERATOR(ROWCOUNT => {{RETENTION_DAYS}}))
),

cohort_sizes AS (
SELECT activity_date AS cohort_date, COUNT(DISTINCT user_id) AS cohort_size
FROM activity_days
GROUP BY activity_date
),

retention AS (
SELECT
ad.activity_date AS cohort_date,
d.day_n,
COUNT(DISTINCT CASE WHEN ret.user_id IS NOT NULL THEN ad.user_id END) AS retained_users
FROM activity_days ad
CROSS JOIN day_offsets d
LEFT JOIN activity_days ret
ON ret.user_id = ad.user_id
AND ret.activity_date = DATEADD('day', d.day_n, ad.activity_date)
GROUP BY ad.activity_date, d.day_n
)

SELECT
r.cohort_date,
cs.cohort_size,
r.day_n,
r.retained_users,
ROUND(100.0 * r.retained_users / NULLIF(cs.cohort_size, 0), 1) AS retention_pct
FROM retention r
INNER JOIN cohort_sizes cs ON cs.cohort_date = r.cohort_date
WHERE r.cohort_date <= DATEADD('day', -{{RETENTION_DAYS}}, CURRENT_DATE())
ORDER BY r.cohort_date DESC, r.day_n;
Sample output
COHORT_DATECOHORT_SIZEDAY_NRETAINED_USERSRETENTION_PCT
2025-03-0152052100.0
2025-03-015214484.6
2025-03-015273567.3
2025-03-0152132853.8

Funnel: Session to page view

A three-step funnel showing the conversion from active user to first pageview to two or more pageviews. Also splits step 2 and step 3 by whether the user performed a governance action in the same period.

-- funnel_session_to_pageview.sql
-- Multi-step funnel analysis: active user -> pageview -> deeper engagement.
-- Uses "user had any event" instead of amplitude session checks.
--
-- Steps:
-- Step 1: User had any event (track or page)
-- Step 2: User viewed a page
-- Step 3: User viewed 2+ pages (optional)
--
-- Governance split: splits step 2+ by whether the user also
-- performed a governance action in the same period.
--
-- Parameters:
-- {{START_DATE}} - e.g., '2025-01-01'
-- {{END_DATE}} - e.g., '2025-12-31'
-- {{DOMAIN}} - e.g., 'acme.atlan.com'

WITH user_domains AS (
SELECT user_id, MAX(domain) AS domain
FROM {{DATABASE}}.{{SCHEMA}}.PAGES
WHERE domain IS NOT NULL
GROUP BY user_id
),

-- Step 1: Users with at least one event (any activity)
active_users AS (
SELECT DISTINCT sub.user_id
FROM (
SELECT t.user_id, t.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.TRACKS t
INNER JOIN user_domains ud ON ud.user_id = t.user_id
WHERE t.TIMESTAMP >= {{START_DATE}} AND t.TIMESTAMP < {{END_DATE}}
AND t.event_text NOT IN (
'atlan_analaytics_aggregateinfo_fetch',
'api_error_emit', 'api_evaluator_cancelled', 'api_evaluator_succeeded',
'Experiment Started', '$experiment_started',
'web_vital_metric_inp_track', 'web_vital_metric_ttfb_track',
'performance_metric_user_timing_discovery_search',
'performance_metric_user_timing_app_bootstrap',
'web_vital_metric_fcp_track', 'web_vital_metric_lcp_track'
)
AND t.event_text NOT LIKE 'workflow_%'
AND ud.domain = {{DOMAIN}}
UNION ALL
SELECT p.user_id, p.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.PAGES p
WHERE p.TIMESTAMP >= {{START_DATE}} AND p.TIMESTAMP < {{END_DATE}}
AND p.domain = {{DOMAIN}}
) sub
),

-- Step 2: Users with at least one pageview
pageview_counts AS (
SELECT
p.user_id,
COUNT(*) AS pv_count
FROM {{DATABASE}}.{{SCHEMA}}.PAGES p
WHERE p.TIMESTAMP >= {{START_DATE}} AND p.TIMESTAMP < {{END_DATE}}
AND p.domain = {{DOMAIN}}
GROUP BY p.user_id
),

-- Governance action flag per user
governance_users AS (
SELECT DISTINCT t.user_id
FROM {{DATABASE}}.{{SCHEMA}}.TRACKS t
INNER JOIN user_domains ud ON ud.user_id = t.user_id
WHERE t.TIMESTAMP >= {{START_DATE}} AND t.TIMESTAMP < {{END_DATE}}
AND ud.domain = {{DOMAIN}}
AND (
t.event_text LIKE 'governance_%'
OR t.event_text LIKE 'gtc_tree_create_%'
OR t.event_text LIKE 'asset_update_%'
)
),

-- Funnel summary
funnel AS (
SELECT
'Step 1: Active User' AS step_name,
1 AS step_order,
COUNT(DISTINCT au.user_id) AS total_users,
NULL AS with_governance,
NULL AS without_governance
FROM active_users au

UNION ALL

SELECT
'Step 2: Pageview' AS step_name,
2 AS step_order,
COUNT(DISTINCT au.user_id) AS total_users,
COUNT(DISTINCT CASE WHEN gu.user_id IS NOT NULL THEN au.user_id END) AS with_governance,
COUNT(DISTINCT CASE WHEN gu.user_id IS NULL THEN au.user_id END) AS without_governance
FROM active_users au
INNER JOIN pageview_counts pc ON pc.user_id = au.user_id AND pc.pv_count >= 1
LEFT JOIN governance_users gu ON gu.user_id = au.user_id

UNION ALL

SELECT
'Step 3: 2+ Pageviews' AS step_name,
3 AS step_order,
COUNT(DISTINCT au.user_id) AS total_users,
COUNT(DISTINCT CASE WHEN gu.user_id IS NOT NULL THEN au.user_id END) AS with_governance,
COUNT(DISTINCT CASE WHEN gu.user_id IS NULL THEN au.user_id END) AS without_governance
FROM active_users au
INNER JOIN pageview_counts pc ON pc.user_id = au.user_id AND pc.pv_count >= 2
LEFT JOIN governance_users gu ON gu.user_id = au.user_id
)

SELECT
step_name,
step_order,
total_users,
with_governance,
without_governance,
ROUND(100.0 * total_users / NULLIF(FIRST_VALUE(total_users) OVER (ORDER BY step_order), 0), 1) AS conversion_from_step1_pct,
ROUND(100.0 * total_users / NULLIF(LAG(total_users) OVER (ORDER BY step_order), 0), 1) AS conversion_from_prev_pct
FROM funnel
ORDER BY step_order;
Sample output
STEP_NAMESTEP_ORDERTOTAL_USERSWITH_GOVERNANCEWITHOUT_GOVERNANCECONVERSION_FROM_STEP1_PCTCONVERSION_FROM_PREV_PCT
Step 1: Active User1124nullnull100.0null
Step 2: Pageview2118388095.295.2
Step 3: 2+ Pageviews3104347083.988.1

Monthly retention cohort

Builds a triangular cohort retention matrix. Each row represents a monthly cohort (users first seen in that month) and columns show how many returned in month 1, 2, 3, and later months.

-- monthly_retention_cohort.sql
-- Cohort retention: what % of users who first appeared in month X returned in month X+1, X+2, etc.
-- Output is a triangular retention matrix.
--
-- Parameters:
-- {{START_DATE}} - e.g., '2025-01-01'
-- {{DOMAIN}} - e.g., 'acme.atlan.com' (or replace filter with 1=1 for all)

WITH user_domains AS (
SELECT user_id, MAX(domain) AS domain
FROM {{DATABASE}}.{{SCHEMA}}.PAGES
WHERE domain IS NOT NULL
GROUP BY user_id
),

user_months AS (
SELECT DISTINCT
sub.user_id,
ud.domain,
DATE_TRUNC('MONTH', CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', sub.TIMESTAMP)) AS activity_month
FROM (
SELECT t.user_id, t.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.TRACKS t
INNER JOIN user_domains ud2 ON ud2.user_id = t.user_id
WHERE t.event_text NOT IN (
'atlan_analaytics_aggregateinfo_fetch',
'api_error_emit', 'api_evaluator_cancelled', 'api_evaluator_succeeded',
'Experiment Started', '$experiment_started',
'web_vital_metric_inp_track', 'web_vital_metric_ttfb_track',
'performance_metric_user_timing_discovery_search',
'performance_metric_user_timing_app_bootstrap',
'web_vital_metric_fcp_track', 'web_vital_metric_lcp_track'
)
AND t.event_text NOT LIKE 'workflow_%'
AND ud2.domain = {{DOMAIN}}
UNION ALL
SELECT p.user_id, p.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.PAGES p
WHERE p.domain = {{DOMAIN}}
) sub
INNER JOIN user_domains ud ON ud.user_id = sub.user_id
WHERE sub.TIMESTAMP >= {{START_DATE}}
),

cohorts AS (
SELECT
user_id,
domain,
MIN(activity_month) AS cohort_month
FROM user_months
GROUP BY user_id, domain
),

retention AS (
SELECT
c.cohort_month,
DATEDIFF('month', c.cohort_month, um.activity_month) AS months_since_start,
COUNT(DISTINCT um.user_id) AS active_users
FROM cohorts c
INNER JOIN user_months um ON um.user_id = c.user_id AND um.domain = c.domain
GROUP BY c.cohort_month, months_since_start
),

cohort_sizes AS (
SELECT cohort_month, COUNT(DISTINCT user_id) AS cohort_size
FROM cohorts
GROUP BY cohort_month
)

SELECT
r.cohort_month,
cs.cohort_size,
r.months_since_start,
r.active_users,
ROUND(100.0 * r.active_users / cs.cohort_size, 1) AS retention_pct
FROM retention r
JOIN cohort_sizes cs ON cs.cohort_month = r.cohort_month
ORDER BY r.cohort_month, r.months_since_start;
Sample output
COHORT_MONTHCOHORT_SIZEMONTHS_SINCE_STARTACTIVE_USERSRETENTION_PCT
2025-01-0187087100.0
2025-01-018716473.6
2025-01-018725866.7
2025-02-0131031100.0
2025-02-013112477.4

Aggregate retention rate

Returns a per-week aggregate retention rate: of all users who had any activity in a given week, what percentage also had a page view within 7 days of their first activity. Provides a single high-level retention signal without per-cohort granularity.

-- retention_rate_aggregate.sql
-- Aggregate retention rate: of users with any activity, what % had a pageview
-- within 7 days? Returns a single per-week summary (not per-cohort).
-- Uses daily user activity instead of amplitude session IDs.
--
-- Parameters:
-- {{START_DATE}} - e.g., '2025-01-01'
-- {{DOMAIN}} - e.g., 'acme.atlan.com'

WITH user_domains AS (
SELECT user_id, MAX(domain) AS domain
FROM {{DATABASE}}.{{SCHEMA}}.PAGES
WHERE domain IS NOT NULL
GROUP BY user_id
),

-- First activity date per user (entry point)
first_activity AS (
SELECT
sub.user_id,
MIN(DATE(CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', sub.TIMESTAMP))) AS first_activity_date
FROM (
SELECT t.user_id, t.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.TRACKS t
INNER JOIN user_domains ud ON ud.user_id = t.user_id
WHERE t.TIMESTAMP >= {{START_DATE}}
AND t.event_text NOT IN (
'atlan_analaytics_aggregateinfo_fetch',
'api_error_emit', 'api_evaluator_cancelled', 'api_evaluator_succeeded',
'Experiment Started', '$experiment_started',
'web_vital_metric_inp_track', 'web_vital_metric_ttfb_track',
'performance_metric_user_timing_discovery_search',
'performance_metric_user_timing_app_bootstrap',
'web_vital_metric_fcp_track', 'web_vital_metric_lcp_track'
)
AND t.event_text NOT LIKE 'workflow_%'
AND ud.domain = {{DOMAIN}}
UNION ALL
SELECT p.user_id, p.TIMESTAMP
FROM {{DATABASE}}.{{SCHEMA}}.PAGES p
WHERE p.TIMESTAMP >= {{START_DATE}}
AND p.domain = {{DOMAIN}}
) sub
GROUP BY sub.user_id
),

-- First pageview within 7 days of first activity
pageview_within_7d AS (
SELECT
fa.user_id,
fa.first_activity_date,
MIN(DATE(CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', p.TIMESTAMP))) AS first_pv_date
FROM first_activity fa
INNER JOIN {{DATABASE}}.{{SCHEMA}}.PAGES p
ON p.user_id = fa.user_id
AND DATE(CONVERT_TIMEZONE('UTC', 'Asia/Kolkata', p.TIMESTAMP))
BETWEEN fa.first_activity_date AND DATEADD('day', 7, fa.first_activity_date)
WHERE p.TIMESTAMP >= {{START_DATE}}
AND p.domain = {{DOMAIN}}
GROUP BY fa.user_id, fa.first_activity_date
)

SELECT
DATE_TRUNC('WEEK', fa.first_activity_date) AS cohort_week,
COUNT(DISTINCT fa.user_id) AS users_with_activity,
COUNT(DISTINCT pv.user_id) AS users_with_pageview_7d,
ROUND(100.0 * COUNT(DISTINCT pv.user_id) / NULLIF(COUNT(DISTINCT fa.user_id), 0), 1) AS retention_rate_pct
FROM first_activity fa
LEFT JOIN pageview_within_7d pv ON pv.user_id = fa.user_id
WHERE fa.first_activity_date <= DATEADD('day', -7, CURRENT_DATE()) -- Only complete 7-day windows
GROUP BY cohort_week
ORDER BY cohort_week DESC;
Sample output
COHORT_WEEKUSERS_WITH_ACTIVITYUSERS_WITH_PAGEVIEW_7DRETENTION_RATE_PCT
2025-03-10897685.4
2025-03-03948186.2
2025-02-24816782.7