Lineage tags coverage across systems
Measure and improve tag coverage and propagation across systems using Lakehouse. Use lineage and tag metadata to understand how sensitive or governed data flows from upstream sources to all downstream tables, views, and dashboards.
This page provides SQL templates and guidance for analyzing tag propagation and coverage gaps across your data assets.
Before you begin
Before you run these queries, make sure:
- Your Lakehouse setup is complete and Lakehouse is enabled for your organization. If setup isn't complete, see Get started with Lakehouse.
- The Gold layer is set up for your SQL engine. See Set up gold layer in Snowflake. For Databricks (Databricks setup guide) or BigQuery (BigQuery setup guide) setups, contact your Atlan Customer Success team.
You can use SQL queries directly on your lineage and asset lookup tables in the Lakehouse to analyze tag propagation performance and coverage across systems.
Analyze tag propagation across lineage
Trace how governance tags flow from tagged upstream assets to all downstream consumers. Use this to identify where tags are missing, partially propagated, or replaced, and to calculate a tag inheritance percentage at each hop.
Use this query to:
- Identify where expected governance tags such as
PII,Confidential,Finance, orCost_centerare dropped along the lineage path. - Quantify tag inheritance percentage at each hop so you can measure coverage quality.
- Highlight downstream assets that introduce new tags or have no tags at all, which are prime candidates for remediation or review.
- Snowflake
- Databricks
- BigQuery
USE ATLAN_GOLD;
-- ============================================
-- TAG PROPAGATION ANALYSIS
-- Trace how governance tags flow through downstream lineage
-- ============================================
WITH tagged_sources AS (
-- Identify tagged source assets
SELECT
GUID,
ASSET_NAME,
ASSET_TYPE,
ASSET_QUALIFIED_NAME,
CONNECTOR_NAME,
TAGS,
CERTIFICATE_STATUS,
OWNER_USERS,
TO_TIMESTAMP_LTZ(UPDATED_AT / 1000) AS LAST_UPDATED
FROM ASSETS
WHERE
STATUS = 'ACTIVE'
AND CONNECTOR_NAME = 'snowflake'
AND TAGS IS NOT NULL
AND ARRAY_SIZE(TAGS) > 0
AND (
ARRAY_CONTAINS('PII'::VARIANT, TAGS)
OR ARRAY_CONTAINS('Confidential'::VARIANT, TAGS)
OR ARRAY_CONTAINS('Finance'::VARIANT, TAGS)
OR ARRAY_CONTAINS('Cost_center'::VARIANT, TAGS)
)
),
downstream_lineage AS (
-- Expand downstream lineage
SELECT
TS.GUID AS SOURCE_GUID,
TS.ASSET_NAME AS SOURCE_NAME,
TS.ASSET_TYPE AS SOURCE_TYPE,
TS.CONNECTOR_NAME AS SOURCE_CONNECTOR,
TS.TAGS AS SOURCE_TAGS,
L.RELATED_GUID AS DOWNSTREAM_GUID,
L.RELATED_NAME AS DOWNSTREAM_NAME,
L.RELATED_TYPE AS DOWNSTREAM_TYPE,
L.LEVEL AS LINEAGE_DEPTH,
RA.TAGS AS DOWNSTREAM_TAGS,
RA.CONNECTOR_NAME AS DOWNSTREAM_CONNECTOR
FROM tagged_sources TS
INNER JOIN LINEAGE L
ON TS.GUID = L.START_GUID
LEFT JOIN ASSETS RA
ON L.RELATED_GUID = RA.GUID
WHERE
L.DIRECTION = 'DOWNSTREAM'
)
SELECT
SOURCE_NAME,
SOURCE_TYPE,
SOURCE_CONNECTOR,
SOURCE_TAGS,
LINEAGE_DEPTH,
DOWNSTREAM_NAME,
DOWNSTREAM_TYPE,
DOWNSTREAM_CONNECTOR,
DOWNSTREAM_TAGS,
CASE
WHEN DOWNSTREAM_TAGS IS NULL
OR ARRAY_SIZE(DOWNSTREAM_TAGS) = 0
THEN 'NO TAGS - Complete tag drop'
WHEN ARRAYS_OVERLAP(SOURCE_TAGS, DOWNSTREAM_TAGS)
THEN 'TAGS PROPAGATED - Some or all tags present'
ELSE 'DIFFERENT TAGS - Tagged but not inherited from source'
END AS TAG_PROPAGATION_STATUS,
ARRAY_EXCEPT(
SOURCE_TAGS,
COALESCE(DOWNSTREAM_TAGS, ARRAY_CONSTRUCT())
) AS DROPPED_TAGS,
CASE
WHEN DOWNSTREAM_TAGS IS NOT NULL
AND ARRAY_SIZE(DOWNSTREAM_TAGS) > 0
THEN ROUND(
(ARRAY_SIZE(
ARRAY_INTERSECTION(SOURCE_TAGS, DOWNSTREAM_TAGS)
) * 100.0) / ARRAY_SIZE(SOURCE_TAGS),
2
)
ELSE 0
END AS TAG_INHERITANCE_PCT
FROM downstream_lineage
ORDER BY
SOURCE_NAME,
LINEAGE_DEPTH,
DOWNSTREAM_NAME;
USE atlan_gold;
-- ============================================
-- TAG PROPAGATION ANALYSIS (Databricks SQL)
-- Purpose:
-- Trace how governance tags (PII, Confidential, Finance, Cost_center)
-- flow through downstream lineage across all connectors.
--
-- For each tagged upstream asset:
-- • List all downstream consumers
-- • Check whether expected tags are present downstream
-- • Identify where tags are dropped or replaced
-- • Quantify tag inheritance percentage
-- ============================================
WITH tagged_sources AS (
-- --------------------------------------------
-- STEP 1: Identify tagged "source" assets
-- --------------------------------------------
-- These are active assets that already carry
-- governance-relevant tags. They represent the
-- starting point of lineage analysis.
--
-- Filters:
-- • STATUS = 'ACTIVE' - Only analyze current assets
-- • CONNECTOR_NAME - Customize to your data platform(s)
-- • TAGS - Focus on governance-critical tags
-- --------------------------------------------
SELECT
GUID,
ASSET_NAME,
ASSET_TYPE,
ASSET_QUALIFIED_NAME,
CONNECTOR_NAME,
TAGS, -- Tags applied at the source
CERTIFICATE_STATUS,
OWNER_USERS,
from_unixtime(UPDATED_AT / 1000) AS LAST_UPDATED -- Convert epoch milliseconds to timestamp
FROM ASSETS
WHERE
STATUS = 'ACTIVE'
AND CONNECTOR_NAME = 'snowflake' -- 🔧 CUSTOMIZE: Replace with your connector (e.g., 'databricks', 'bigquery')
AND TAGS IS NOT NULL
AND size(TAGS) > 0 -- Databricks: use size() instead of ARRAY_SIZE()
-- Focus on specific governance tags we expect
-- to propagate through lineage
-- 🔧 CUSTOMIZE: Replace with your organization's governance tags
AND (
array_contains(TAGS, 'PII')
OR array_contains(TAGS, 'Confidential')
OR array_contains(TAGS, 'Finance')
OR array_contains(TAGS, 'Cost_center')
)
),
downstream_lineage AS (
-- --------------------------------------------
-- STEP 2: Expand downstream lineage
-- --------------------------------------------
-- For each tagged source asset, retrieve all
-- downstream assets using the LINEAGE view.
--
-- This gives us the full dependency chain and
-- allows us to evaluate tag propagation at each hop.
--
-- LINEAGE.DIRECTION = 'DOWNSTREAM' ensures we only
-- follow forward dependencies (source → consumers).
-- --------------------------------------------
SELECT
TS.GUID AS SOURCE_GUID,
TS.ASSET_NAME AS SOURCE_NAME,
TS.ASSET_TYPE AS SOURCE_TYPE,
TS.CONNECTOR_NAME AS SOURCE_CONNECTOR,
TS.TAGS AS SOURCE_TAGS,
TS.LAST_UPDATED AS SOURCE_LAST_UPDATED,
-- Downstream asset details from LINEAGE view
L.RELATED_GUID AS DOWNSTREAM_GUID,
L.RELATED_NAME AS DOWNSTREAM_NAME,
L.RELATED_TYPE AS DOWNSTREAM_TYPE,
L.LEVEL AS LINEAGE_DEPTH, -- Distance from the source (number of hops)
-- Downstream metadata from ASSETS for governance analysis
RA.TAGS AS DOWNSTREAM_TAGS,
RA.CONNECTOR_NAME AS DOWNSTREAM_CONNECTOR,
RA.ASSET_QUALIFIED_NAME AS DOWNSTREAM_QUALIFIED_NAME,
RA.OWNER_USERS AS DOWNSTREAM_OWNERS,
RA.CERTIFICATE_STATUS AS DOWNSTREAM_CERT,
from_unixtime(RA.UPDATED_AT / 1000) AS DOWNSTREAM_LAST_UPDATED,
datediff(CURRENT_DATE(), from_unixtime(RA.UPDATED_AT / 1000)) AS DOWNSTREAM_DAYS_SINCE_UPDATE
FROM tagged_sources TS
INNER JOIN LINEAGE L
ON TS.GUID = L.START_GUID
LEFT JOIN ASSETS RA
ON L.RELATED_GUID = RA.GUID
WHERE
L.DIRECTION = 'DOWNSTREAM' -- Only trace forward dependencies
)
-- --------------------------------------------
-- STEP 3: Tag propagation evaluation
-- --------------------------------------------
-- Compare source tags with downstream tags to:
-- • Detect tag inheritance (full or partial)
-- • Identify dropped tags (governance gaps)
-- • Identify newly added tags (manual overrides)
-- • Quantify propagation effectiveness as percentage
--
-- This step generates actionable insights for
-- governance teams to remediate tag coverage issues.
-- --------------------------------------------
SELECT
SOURCE_NAME,
SOURCE_TYPE,
SOURCE_CONNECTOR,
SOURCE_TAGS,
SOURCE_LAST_UPDATED,
LINEAGE_DEPTH,
DOWNSTREAM_NAME,
DOWNSTREAM_TYPE,
DOWNSTREAM_CONNECTOR,
DOWNSTREAM_QUALIFIED_NAME,
DOWNSTREAM_TAGS,
DOWNSTREAM_OWNERS,
DOWNSTREAM_CERT,
DOWNSTREAM_LAST_UPDATED,
DOWNSTREAM_DAYS_SINCE_UPDATE,
-- ----------------------------------------
-- High-level propagation status
-- ----------------------------------------
-- Provides at-a-glance assessment of tag inheritance:
-- 🔴 NO TAGS - Complete tag drop (governance gap)
-- 🟢 TAGS PROPAGATED - Some or all tags inherited
-- 🟡 DIFFERENT TAGS - Tagged but not from source
-- ----------------------------------------
CASE
-- Downstream asset has no tags at all
WHEN DOWNSTREAM_TAGS IS NULL
OR size(DOWNSTREAM_TAGS) = 0
THEN '🔴 NO TAGS - Complete tag drop'
-- At least one source tag exists downstream
WHEN arrays_overlap(SOURCE_TAGS, DOWNSTREAM_TAGS)
THEN '🟢 TAGS PROPAGATED - Some or all tags present'
-- Downstream is tagged, but not with source tags
ELSE '🟡 DIFFERENT TAGS - Tagged but not inherited from source'
END AS TAG_PROPAGATION_STATUS,
-- ----------------------------------------
-- Tags present at source but missing downstream
-- ----------------------------------------
-- Identifies exact "drop points" where governance
-- tags were lost. Use this to prioritize remediation
-- efforts on critical assets.
-- ----------------------------------------
array_except(
SOURCE_TAGS,
COALESCE(DOWNSTREAM_TAGS, array()) -- Databricks: use array() instead of ARRAY_CONSTRUCT()
) AS DROPPED_TAGS,
-- ----------------------------------------
-- Tags newly introduced downstream
-- ----------------------------------------
-- Helps spot manual or inconsistent tagging patterns
-- that may not align with upstream governance policies.
-- ----------------------------------------
array_except(
COALESCE(DOWNSTREAM_TAGS, array()),
SOURCE_TAGS
) AS NEW_TAGS_ADDED,
-- ----------------------------------------
-- Tag inheritance percentage
-- ----------------------------------------
-- Measures how many expected tags survived the
-- lineage propagation. 100% = perfect inheritance.
-- Lower percentages indicate governance gaps.
-- ----------------------------------------
CASE
WHEN DOWNSTREAM_TAGS IS NOT NULL
AND size(DOWNSTREAM_TAGS) > 0
THEN ROUND(
(size(
array_intersect(SOURCE_TAGS, DOWNSTREAM_TAGS)
) * 100.0) / size(SOURCE_TAGS),
2
)
ELSE 0
END AS TAG_INHERITANCE_PCT
FROM downstream_lineage
-- --------------------------------------------
-- Ordering:
-- • Group by source asset
-- • Walk lineage depth outward (1 hop, 2 hops, etc.)
-- • Stable ordering for audits & reviews
-- --------------------------------------------
ORDER BY
SOURCE_NAME,
LINEAGE_DEPTH,
DOWNSTREAM_NAME;
SET @@dataset_id = 'ATLAN_GOLD';
-- ============================================
-- TAG PROPAGATION ANALYSIS
-- Trace how governance tags flow through downstream lineage
-- ============================================
WITH tagged_sources AS (
SELECT
GUID,
ASSET_NAME,
ASSET_TYPE,
ASSET_QUALIFIED_NAME,
CONNECTOR_NAME,
TAGS,
CERTIFICATE_STATUS,
OWNER_USERS
FROM `ASSETS`
WHERE
STATUS = 'ACTIVE'
AND CONNECTOR_NAME = 'snowflake'
AND TAGS IS NOT NULL
AND ARRAY_LENGTH(TAGS) > 0
AND (
'PII' IN UNNEST(TAGS)
OR 'Confidential' IN UNNEST(TAGS)
OR 'Finance' IN UNNEST(TAGS)
OR 'Cost_center' IN UNNEST(TAGS)
)
),
downstream_lineage AS (
SELECT
TS.GUID AS SOURCE_GUID,
TS.ASSET_NAME AS SOURCE_NAME,
TS.ASSET_TYPE AS SOURCE_TYPE,
TS.CONNECTOR_NAME AS SOURCE_CONNECTOR,
TS.TAGS AS SOURCE_TAGS,
L.RELATED_GUID AS DOWNSTREAM_GUID,
L.RELATED_NAME AS DOWNSTREAM_NAME,
L.RELATED_TYPE AS DOWNSTREAM_TYPE,
L.LEVEL AS LINEAGE_DEPTH,
RA.TAGS AS DOWNSTREAM_TAGS,
RA.CONNECTOR_NAME AS DOWNSTREAM_CONNECTOR
FROM tagged_sources TS
INNER JOIN `LINEAGE` L
ON TS.GUID = L.START_GUID
LEFT JOIN `ASSETS` RA
ON L.RELATED_GUID = RA.GUID
WHERE
L.DIRECTION = 'DOWNSTREAM'
)
SELECT
SOURCE_NAME,
SOURCE_TYPE,
SOURCE_CONNECTOR,
SOURCE_TAGS,
LINEAGE_DEPTH,
DOWNSTREAM_NAME,
DOWNSTREAM_TYPE,
DOWNSTREAM_CONNECTOR,
DOWNSTREAM_TAGS,
CASE
WHEN DOWNSTREAM_TAGS IS NULL
OR ARRAY_LENGTH(DOWNSTREAM_TAGS) = 0
THEN 'NO TAGS - Complete tag drop'
WHEN EXISTS (
SELECT 1 FROM UNNEST(SOURCE_TAGS) AS st
WHERE st IN UNNEST(DOWNSTREAM_TAGS)
)
THEN 'TAGS PROPAGATED - Some or all tags present'
ELSE 'DIFFERENT TAGS - Tagged but not inherited from source'
END AS TAG_PROPAGATION_STATUS,
ARRAY(
SELECT st FROM UNNEST(SOURCE_TAGS) AS st
WHERE st NOT IN (SELECT dt FROM UNNEST(IFNULL(DOWNSTREAM_TAGS, [])) AS dt)
) AS DROPPED_TAGS,
CASE
WHEN DOWNSTREAM_TAGS IS NOT NULL
AND ARRAY_LENGTH(DOWNSTREAM_TAGS) > 0
THEN ROUND(
(ARRAY_LENGTH(ARRAY(
SELECT st FROM UNNEST(SOURCE_TAGS) AS st
WHERE st IN (SELECT dt FROM UNNEST(DOWNSTREAM_TAGS) AS dt)
)) * 100.0) / ARRAY_LENGTH(SOURCE_TAGS),
2
)
ELSE 0
END AS TAG_INHERITANCE_PCT
FROM downstream_lineage
ORDER BY
SOURCE_NAME,
LINEAGE_DEPTH,
DOWNSTREAM_NAME;
Sample output
| SOURCE_NAME | SOURCE_TYPE | SOURCE_TAGS | LINEAGE_DEPTH | DOWNSTREAM_NAME | DOWNSTREAM_TYPE | DOWNSTREAM_CONNECTOR | TAG_PROPAGATION_STATUS | TAG_INHERITANCE_PCT |
|---|---|---|---|---|---|---|---|---|
| A | Table | Attribute, Cost_center, Confidential | 1 | fraud_detection_model_v1 | DatabricksAIModelVersion | databricks | TAGS PROPAGATED - Some or all tags present | 100.00 |
| ABC_LOT_ID | Column | Attribute, Customer Service, Cost_center | 1 | ABC_LOT_ID | PowerBIColumn | powerbi | TAGS PROPAGATED - Some or all tags present | 100.00 |
| ABOUT_ME | Column | Customer Service, Attribute, Confidential | 1 | ABOUT_ME | Column | snowflake | TAGS PROPAGATED - Some or all tags present | 100.00 |
| ACCOUNT | Table | PII, Finance, Confidential, Customer Service | 1 | ACCOUNT (SALESFORCE.ACCOUNT) | TableauDatasource | tableau | TAGS PROPAGATED - Some or all tags present | 77.78 |
| ACCOUNT | Table | PII, Finance, Confidential, Customer Service | 1 | STG_SALESFORCE__ACCOUNT_TMP | View | snowflake | TAGS PROPAGATED - Some or all tags present | 77.78 |
| ACCOUNT | Table | PII, Finance, Confidential, Customer Service | 2 | STG_SALESFORCE__ACCOUNT | Table | snowflake | TAGS PROPAGATED - Some or all tags present | 88.89 |
| ACCOUNT | Table | PII, Finance, Confidential, Customer Service | 2 | Sales Operation Dashboard | TableauDashboard | tableau | TAGS PROPAGATED - Some or all tags present | 77.78 |
Common filters and customization examples
Use these filter examples to customize the tag propagation query for your specific needs. Add these filters to the tagged_sources CTE WHERE clause or the final SELECT statement as indicated.
- Snowflake
- Databricks
- BigQuery
-- ============================================
-- COMMON FILTERS FOR SNOWFLAKE
-- Add these to customize your query
-- ============================================
-- Filter by specific connectors
-- Add to tagged_sources WHERE clause:
AND CONNECTOR_NAME IN ('snowflake', 'bigquery', 'databricks')
-- Filter by specific governance tags
-- Replace the default tag filter with your organization's tags:
AND (
ARRAY_CONTAINS('Sensitive'::VARIANT, TAGS)
OR ARRAY_CONTAINS('GDPR'::VARIANT, TAGS)
OR ARRAY_CONTAINS('HIPAA'::VARIANT, TAGS)
)
-- Filter by certification status
-- Add to tagged_sources WHERE clause:
AND CERTIFICATE_STATUS = 'VERIFIED'
-- Filter by specific asset types
-- Add to tagged_sources WHERE clause:
AND ASSET_TYPE IN ('Table', 'View', 'MaterializedView')
-- Filter by time window (last 90 days)
-- Add to tagged_sources WHERE clause:
AND TO_TIMESTAMP_LTZ(UPDATED_AT / 1000) >= DATEADD(day, -90, CURRENT_DATE())
-- Filter by lineage depth
-- Add to final SELECT WHERE clause:
WHERE LINEAGE_DEPTH <= 3 -- Only analyze up to 3 hops downstream
-- ============================================
-- COMMON FILTERS FOR DATABRICKS
-- Add these to customize your query
-- ============================================
-- Filter by specific connectors
-- Add to tagged_sources WHERE clause:
AND CONNECTOR_NAME IN ('snowflake', 'bigquery', 'databricks')
-- Filter by specific governance tags
-- Replace the default tag filter with your organization's tags:
AND (
ARRAY_CONTAINS(TAGS, 'Sensitive')
OR ARRAY_CONTAINS(TAGS, 'GDPR')
OR ARRAY_CONTAINS(TAGS, 'HIPAA')
)
-- Filter by certification status
-- Add to tagged_sources WHERE clause:
AND CERTIFICATE_STATUS = 'VERIFIED'
-- Filter by specific asset types
-- Add to tagged_sources WHERE clause:
AND ASSET_TYPE IN ('Table', 'View', 'MaterializedView')
-- Filter by time window (last 90 days)
-- Add to tagged_sources WHERE clause:
AND FROM_UNIXTIME(UPDATED_AT / 1000) >= DATE_SUB(CURRENT_DATE(), 90)
-- Filter by lineage depth
-- Add to final SELECT WHERE clause:
WHERE LINEAGE_DEPTH <= 3 -- Only analyze up to 3 hops downstream
-- ============================================
-- COMMON FILTERS FOR BIGQUERY
-- Add these to customize your query
-- ============================================
-- Filter by specific connectors
-- Add to tagged_sources WHERE clause:
AND CONNECTOR_NAME IN ('snowflake', 'bigquery', 'databricks')
-- Filter by specific governance tags
-- Replace the default tag filter with your organization's tags:
AND (
'Sensitive' IN UNNEST(TAGS)
OR 'GDPR' IN UNNEST(TAGS)
OR 'HIPAA' IN UNNEST(TAGS)
)
-- Filter by certification status
-- Add to tagged_sources WHERE clause:
AND CERTIFICATE_STATUS = 'VERIFIED'
-- Filter by specific asset types
-- Add to tagged_sources WHERE clause:
AND ASSET_TYPE IN ('Table', 'View', 'MaterializedView')
-- Filter by time window (last 90 days)
-- Add to tagged_sources WHERE clause:
AND TIMESTAMP_MILLIS(UPDATED_AT) >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)
-- Filter by lineage depth
-- Add to final SELECT WHERE clause:
WHERE LINEAGE_DEPTH <= 3 -- Only analyze up to 3 hops downstream