Metadata completeness tracking
Organizations rely on a metadata catalog like Atlan to make data discoverable, trustworthy, and reusable across teams. As metadata is harvested from different sources, enrichment such as descriptions, owners, tags, certifications, and custom metadata can be inconsistent or incomplete. Use Lakehouse to track enrichment coverage so you can identify governance gaps, prioritize cleanup efforts, and continuously improve the quality and usability of your data assets.
Before you begin
Before you run these queries, make sure:
- Your Lakehouse setup is complete and Lakehouse is enabled for your organization. If setup isn't complete, see Get started with Lakehouse.
- The Gold layer is set up for your SQL engine. See Set up gold layer in Snowflake. For Databricks (Databricks setup guide) or BigQuery (BigQuery setup guide) setups, contact your Atlan Customer Success team.
Metadata completeness
This query measures metadata completeness across asset types by analyzing how many assets have descriptions, tags, certifications, owners, and Custom Metadata (for example, AI Readiness). It produces a governance scorecard showing enrichment gaps.
- Snowflake
- Databricks
- BigQuery
-- ============================================
-- ASSET COMPLETENESS & ENRICHMENT TRACKING
-- ============================================
-- Purpose:
-- Measure metadata completeness and governance maturity
-- across asset types by calculating:
-- • Description coverage (documented vs. undocumented)
-- • Tag coverage (tagged vs. untagged)
-- • Certification coverage (verified vs. uncertified)
-- • Ownership coverage (owned vs. orphaned)
-- • Custom Metadata enrichment (enriched vs. not enriched)
--
-- Output: Governance scorecard for catalog quality assessment
-- ============================================
WITH cm_stats AS (
-- --------------------------------------------
-- STEP 1: Calculate Custom Metadata enrichment
-- --------------------------------------------
-- For each asset, count how many Custom Metadata
-- properties are populated. This measures specialized
-- enrichment like AI Readiness, Data Quality, etc.
--
-- 🔧 CUSTOMIZE: Update the CM name filter in FLATTEN
-- to match your organization's Custom Metadata sets
-- --------------------------------------------
SELECT
alt.GUID AS asset_guid,
SUM(
-- Only count CM properties for specified CM sets
CASE
WHEN (
ARRAY_CONTAINS('ALL'::VARIANT, ARRAY_CONSTRUCT('XX')) -- Use 'ALL' to include all CM sets
OR cm.CUSTOM_METADATA_NAME IN (
-- 🔧 CUSTOMIZE: List your Custom Metadata set names here
SELECT value::STRING
FROM TABLE(FLATTEN(input => ARRAY_CONSTRUCT('AI Readiness')))
)
)
THEN
-- Check if the CM attribute has a non-null, non-empty value
CASE
WHEN cm.ATTRIBUTE_VALUE IS NULL THEN 0
-- Handle array-type CM attributes
WHEN IS_ARRAY(TRY_PARSE_JSON(cm.ATTRIBUTE_VALUE))
THEN
CASE
WHEN ARRAY_SIZE(TRY_PARSE_JSON(cm.ATTRIBUTE_VALUE)) > 0
THEN 1
ELSE 0
END
-- Handle non-array CM attributes with values
WHEN TYPEOF(TRY_PARSE_JSON(cm.ATTRIBUTE_VALUE)) <> 'ARRAY'
AND cm.ATTRIBUTE_VALUE IS NOT NULL
THEN 1
ELSE 0
END
ELSE 0
END
) AS linked_cm_prop_count
FROM ASSETS alt
LEFT JOIN CUSTOM_METADATA cm
ON alt.GUID = cm.ASSET_GUID
WHERE 1=1
GROUP BY alt.GUID
),
entity_stats AS (
-- --------------------------------------------
-- STEP 2: Calculate enrichment metrics per asset type
-- --------------------------------------------
-- For each asset type, count assets with/without:
-- • Descriptions
-- • Tags
-- • Certifications
-- • Owners
-- • Custom Metadata
-- --------------------------------------------
SELECT
ASSET_TYPE,
COUNT(*) AS total_count,
-- ========================================
-- DESCRIPTION COVERAGE
-- ========================================
COUNT(
CASE
WHEN (DESCRIPTION IS NOT NULL AND DESCRIPTION <> '')
THEN 1
END
) AS with_description,
COUNT(
CASE
WHEN (DESCRIPTION IS NULL OR DESCRIPTION = '')
THEN 1
END
) AS without_description,
-- ========================================
-- CERTIFICATION COVERAGE
-- ========================================
COUNT(
CASE
WHEN LOWER(CERTIFICATE_STATUS) = 'verified'
THEN 1
END
) AS certified,
COUNT(
CASE
WHEN LOWER(CERTIFICATE_STATUS) != 'verified'
OR CERTIFICATE_STATUS IS NULL
THEN 1
END
) AS uncertified,
-- ========================================
-- TAG COVERAGE
-- ========================================
COUNT(
CASE
WHEN (TAGS IS NOT NULL AND ARRAY_SIZE(TAGS) > 0)
THEN 1
END
) AS with_tags,
COUNT(
CASE
WHEN (TAGS IS NULL OR ARRAY_SIZE(TAGS) = 0)
THEN 1
END
) AS without_tags,
-- ========================================
-- OWNERSHIP COVERAGE
-- ========================================
-- Note: ASSETS schema only has OWNER_USERS field
-- There is no OWNER_GROUPS field in the schema
-- ========================================
COUNT(
CASE
WHEN (OWNER_USERS IS NOT NULL AND ARRAY_SIZE(OWNER_USERS) > 0)
THEN 1
END
) AS with_owners,
COUNT(
CASE
WHEN (OWNER_USERS IS NULL OR ARRAY_SIZE(OWNER_USERS) = 0)
THEN 1
END
) AS without_owners,
-- ========================================
-- CUSTOM METADATA ENRICHMENT
-- ========================================
COUNT(
CASE
WHEN (linked_cm_prop_count > 0)
THEN 1
END
) AS with_linked_cm_props,
COUNT(
CASE
WHEN (linked_cm_prop_count = 0 OR linked_cm_prop_count IS NULL)
THEN 1
END
) AS without_linked_cm_props
FROM ASSETS alt
LEFT JOIN cm_stats cm
ON alt.GUID = cm.asset_guid
WHERE
-- ========================================
-- ASSET TYPE FILTER (FIXED)
-- ========================================
-- Focus on key asset types for governance reporting
-- 🔧 CUSTOMIZE: Add/remove asset types as needed
-- ========================================
ASSET_TYPE IN (
'Table',
'Schema',
'TableauDashboard',
'TableauWorkbook',
'DataDomain',
'DataProduct',
'AtlasGlossaryTerm',
'AtlasGlossaryCategory',
'AtlasGlossary'
)
GROUP BY ASSET_TYPE
)
-- ============================================
-- STEP 3: Generate enrichment scorecard
-- ============================================
-- Output enrichment metrics with coverage percentages
-- for executive reporting and governance dashboards
-- ============================================
SELECT
ASSET_TYPE,
total_count,
-- Description metrics
with_description,
without_description,
ROUND((with_description * 100.0) / NULLIF(total_count, 0), 2) AS description_coverage_pct,
-- Tag metrics
with_tags,
without_tags,
ROUND((with_tags * 100.0) / NULLIF(total_count, 0), 2) AS tag_coverage_pct,
-- Certification metrics
certified,
uncertified,
ROUND((certified * 100.0) / NULLIF(total_count, 0), 2) AS certification_coverage_pct,
-- Ownership metrics
with_owners,
without_owners,
ROUND((with_owners * 100.0) / NULLIF(total_count, 0), 2) AS ownership_coverage_pct,
-- Custom Metadata metrics
with_linked_cm_props,
without_linked_cm_props,
ROUND((with_linked_cm_props * 100.0) / NULLIF(total_count, 0), 2) AS custom_metadata_coverage_pct
FROM entity_stats
ORDER BY ASSET_TYPE;
-- ============================================
-- ASSET COMPLETENESS & ENRICHMENT TRACKING
-- ============================================
WITH cm_stats AS (
SELECT
alt.GUID AS asset_guid,
SUM(
CASE
WHEN (
array_contains(array('XX'), 'ALL') -- Use 'ALL' to include all CM sets
OR cm.CUSTOM_METADATA_NAME IN (
-- 🔧 CUSTOMIZE: List your Custom Metadata set names
SELECT value
FROM (SELECT explode(array('AI Readiness')) AS value)
)
)
THEN
CASE
WHEN cm.ATTRIBUTE_VALUE IS NULL THEN 0
-- Check if value is a JSON array by looking for '[' at start
-- and checking if it can be parsed
WHEN TRIM(cm.ATTRIBUTE_VALUE) LIKE '[%]'
AND get_json_object(cm.ATTRIBUTE_VALUE, '$[0]') IS NOT NULL
THEN 1
-- Non-array values that are not null
WHEN cm.ATTRIBUTE_VALUE IS NOT NULL
AND TRIM(cm.ATTRIBUTE_VALUE) <> ''
AND TRIM(cm.ATTRIBUTE_VALUE) NOT LIKE '[%]'
THEN 1
ELSE 0
END
ELSE 0
END
) AS linked_cm_prop_count
FROM ASSETS alt
LEFT JOIN CUSTOM_METADATA cm
ON alt.GUID = cm.ASSET_GUID
WHERE TRUE
GROUP BY alt.GUID
),
entity_stats AS (
SELECT
ASSET_TYPE,
COUNT(*) AS total_count,
-- Description coverage
COUNT(CASE WHEN DESCRIPTION IS NOT NULL AND DESCRIPTION <> '' THEN 1 END) AS with_description,
COUNT(CASE WHEN DESCRIPTION IS NULL OR DESCRIPTION = '' THEN 1 END) AS without_description,
-- Certification coverage
COUNT(CASE WHEN LOWER(CERTIFICATE_STATUS) = 'verified' THEN 1 END) AS certified,
COUNT(CASE WHEN LOWER(CERTIFICATE_STATUS) != 'verified' OR CERTIFICATE_STATUS IS NULL THEN 1 END) AS uncertified,
-- Tag coverage
COUNT(CASE WHEN TAGS IS NOT NULL AND size(TAGS) > 0 THEN 1 END) AS with_tags,
COUNT(CASE WHEN TAGS IS NULL OR size(TAGS) = 0 THEN 1 END) AS without_tags,
-- Ownership coverage (only OWNER_USERS exists)
COUNT(CASE WHEN OWNER_USERS IS NOT NULL AND size(OWNER_USERS) > 0 THEN 1 END) AS with_owners,
COUNT(CASE WHEN OWNER_USERS IS NULL OR size(OWNER_USERS) = 0 THEN 1 END) AS without_owners,
-- Custom Metadata coverage
COUNT(CASE WHEN linked_cm_prop_count > 0 THEN 1 END) AS with_linked_cm_props,
COUNT(CASE WHEN linked_cm_prop_count = 0 OR linked_cm_prop_count IS NULL THEN 1 END) AS without_linked_cm_props
FROM ASSETS alt
LEFT JOIN cm_stats cm
ON alt.GUID = cm.asset_guid
WHERE
ASSET_TYPE IN (
'Table',
'Schema',
'TableauDashboard',
'TableauWorkbook',
'DataDomain',
'DataProduct',
'AtlasGlossaryTerm',
'AtlasGlossaryCategory',
'AtlasGlossary'
)
GROUP BY ASSET_TYPE
)
SELECT
ASSET_TYPE,
total_count,
-- Description metrics
with_description,
without_description,
ROUND((with_description * 100.0) / NULLIF(total_count, 0), 2) AS description_coverage_pct,
-- Tag metrics
with_tags,
without_tags,
ROUND((with_tags * 100.0) / NULLIF(total_count, 0), 2) AS tag_coverage_pct,
-- Certification metrics
certified,
uncertified,
ROUND((certified * 100.0) / NULLIF(total_count, 0), 2) AS certification_coverage_pct,
-- Ownership metrics
with_owners,
without_owners,
ROUND((with_owners * 100.0) / NULLIF(total_count, 0), 2) AS ownership_coverage_pct,
-- Custom Metadata metrics
with_linked_cm_props,
without_linked_cm_props,
ROUND((with_linked_cm_props * 100.0) / NULLIF(total_count, 0), 2) AS custom_metadata_coverage_pct
FROM entity_stats
ORDER BY ASSET_TYPE;
-- ============================================
-- ASSET COMPLETENESS & ENRICHMENT TRACKING
-- ============================================
WITH cm_stats AS (
SELECT
alt.GUID AS asset_guid,
SUM(
CASE
WHEN (
'ALL' IN UNNEST(['XX']) -- Use 'ALL' to include all CM sets
OR cm.CUSTOM_METADATA_NAME IN (
-- 🔧 CUSTOMIZE: List your Custom Metadata set names
SELECT value
FROM UNNEST(['AI Readiness']) AS value
)
)
THEN
CASE
WHEN cm.ATTRIBUTE_VALUE IS NULL THEN 0
-- Handle array-type CM attributes
WHEN SAFE.PARSE_JSON(cm.ATTRIBUTE_VALUE) IS NOT NULL
AND JSON_TYPE(SAFE.PARSE_JSON(cm.ATTRIBUTE_VALUE)) = 'array'
THEN
CASE
WHEN ARRAY_LENGTH(JSON_EXTRACT_ARRAY(cm.ATTRIBUTE_VALUE)) > 0
THEN 1
ELSE 0
END
-- Handle non-array CM attributes with values
WHEN cm.ATTRIBUTE_VALUE IS NOT NULL
THEN 1
ELSE 0
END
ELSE 0
END
) AS linked_cm_prop_count
FROM ASSETS alt
LEFT JOIN CUSTOM_METADATA cm
ON alt.GUID = cm.ASSET_GUID
WHERE TRUE
GROUP BY alt.GUID
),
entity_stats AS (
SELECT
ASSET_TYPE,
COUNT(*) AS total_count,
-- Description coverage
COUNTIF(DESCRIPTION IS NOT NULL AND DESCRIPTION <> '') AS with_description,
COUNTIF(DESCRIPTION IS NULL OR DESCRIPTION = '') AS without_description,
-- Certification coverage
COUNTIF(LOWER(CERTIFICATE_STATUS) = 'verified') AS certified,
COUNTIF(LOWER(CERTIFICATE_STATUS) != 'verified' OR CERTIFICATE_STATUS IS NULL) AS uncertified,
-- Tag coverage
COUNTIF(TAGS IS NOT NULL AND ARRAY_LENGTH(TAGS) > 0) AS with_tags,
COUNTIF(TAGS IS NULL OR ARRAY_LENGTH(TAGS) = 0) AS without_tags,
-- Ownership coverage (only OWNER_USERS exists)
COUNTIF(OWNER_USERS IS NOT NULL AND ARRAY_LENGTH(OWNER_USERS) > 0) AS with_owners,
COUNTIF(OWNER_USERS IS NULL OR ARRAY_LENGTH(OWNER_USERS) = 0) AS without_owners,
-- Custom Metadata coverage
COUNTIF(linked_cm_prop_count > 0) AS with_linked_cm_props,
COUNTIF(linked_cm_prop_count = 0 OR linked_cm_prop_count IS NULL) AS without_linked_cm_props
FROM ASSETS alt
LEFT JOIN cm_stats cm
ON alt.GUID = cm.asset_guid
WHERE
ASSET_TYPE IN (
'Table',
'Schema',
'TableauDashboard',
'TableauWorkbook',
'DataDomain',
'DataProduct',
'AtlasGlossaryTerm',
'AtlasGlossaryCategory',
'AtlasGlossary'
)
GROUP BY ASSET_TYPE
)
SELECT
ASSET_TYPE,
total_count,
-- Description metrics
with_description,
without_description,
ROUND((with_description * 100.0) / NULLIF(total_count, 0), 2) AS description_coverage_pct,
-- Tag metrics
with_tags,
without_tags,
ROUND((with_tags * 100.0) / NULLIF(total_count, 0), 2) AS tag_coverage_pct,
-- Certification metrics
certified,
uncertified,
ROUND((certified * 100.0) / NULLIF(total_count, 0), 2) AS certification_coverage_pct,
-- Ownership metrics
with_owners,
without_owners,
ROUND((with_owners * 100.0) / NULLIF(total_count, 0), 2) AS ownership_coverage_pct,
-- Custom Metadata metrics
with_linked_cm_props,
without_linked_cm_props,
ROUND((with_linked_cm_props * 100.0) / NULLIF(total_count, 0), 2) AS custom_metadata_coverage_pct
FROM entity_stats
ORDER BY ASSET_TYPE;
Sample output
| ASSET_TYPE | TOTAL_COUNT | WITH_DESCRIPTION | WITHOUT_DESCRIPTION | DESCRIPTION_COVERAGE_PCT | WITH_TAGS | WITHOUT_TAGS | TAG_COVERAGE_PCT | CERTIFIED | UNCERTIFIED | CERTIFICATION_COVERAGE_PCT | WITH_OWNERS | WITHOUT_OWNERS | OWNERSHIP_COVERAGE_PCT | WITH_LINKED_CM_PROPS | WITHOUT_LINKED_CM_PROPS | CUSTOM_METADATA_COVERAGE_PCT |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| AtlasGlossary | 96 | 71 | 25 | 73.96 | 0 | 96 | 0.00 | 47 | 49 | 48.96 | 27 | 69 | 28.13 | 0 | 96 | 0.00 |
| AtlasGlossaryCategory | 633 | 463 | 170 | 73.14 | 16 | 617 | 2.53 | 163 | 470 | 25.75 | 91 | 542 | 14.38 | 0 | 633 | 0.00 |
| AtlasGlossaryTerm | 4526 | 4407 | 119 | 97.37 | 76 | 4450 | 1.68 | 495 | 4031 | 10.94 | 270 | 4256 | 5.97 | 2 | 4524 | 0.04 |
| DataDomain | 115 | 56 | 59 | 48.70 | 0 | 115 | 0.00 | 0 | 115 | 0.00 | 114 | 1 | 99.13 | 0 | 115 | 0.00 |
| DataProduct | 136 | 101 | 35 | 74.26 | 49 | 87 | 36.03 | 45 | 91 | 33.09 | 126 | 10 | 92.65 | 0 | 136 | 0.00 |
| Schema | 587 | 38 | 549 | 6.47 | 5 | 582 | 0.85 | 5 | 582 | 0.85 | 31 | 556 | 5.28 | 0 | 587 | 0.00 |
| Table | 24771 | 725 | 24046 | 2.93 | 17620 | 7151 | 71.13 | 17609 | 7162 | 71.09 | 703 | 24068 | 2.84 | 9 | 24762 | 0.04 |
| TableauDashboard | 23 | 23 | 0 | 100.00 | 23 | 0 | 100.00 | 17 | 6 | 73.91 | 19 | 4 | 82.61 | 0 | 23 | 0.00 |
| TableauWorkbook | 17 | 13 | 4 | 76.47 | 17 | 0 | 100.00 | 11 | 6 | 64.71 | 15 | 2 | 88.24 | 0 | 17 | 0.00 |
Metadata completeness by data domain
This query calculates metadata enrichment statistics for all assets, grouped by their assigned Data Domains. It provides a domain-level view of metadata completeness across four key dimensions:
- Tags: Classification metadata coverage within each domain
- Glossary Terms: Business terminology assignments by domain
- Readme: Documentation coverage per domain
- Descriptions: Asset description completeness by domain
The query includes a special category for assets that aren't assigned to any domain, allowing you to identify and track unassigned assets that may need domain assignment as part of your data mesh governance strategy.
Update placeholders such as <db_name> and custom metadata names to match your environment. This query tracks enrichment coverage across asset types and is flexible. Adjust the asset type filters or add filters such as data source, domain, or platform as needed.
- Snowflake
- Databricks
- BigQuery
-- Calculate metadata enrichment with multiple filters applied
WITH domains AS (
-- Get all Data Domains with their names from ASSETS table
-- FILTER 1: Optionally filter to specific domains
SELECT
a.GUID AS domain_guid,
a.ASSET_NAME AS domain_name
FROM ASSETS a
WHERE a.ASSET_TYPE = 'DataDomain'
AND a.STATUS = 'ACTIVE'
-- Uncomment to filter by specific domains:
-- AND a.ASSET_NAME IN ('Sales Domain', 'Marketing Domain', 'Engineering Domain')
),
assets_with_enrichment AS (
SELECT
COALESCE(d.domain_guid, 'UNASSIGNED') AS domain_guid,
COALESCE(d.domain_name, 'No Domain Assigned') AS domain_name,
a.GUID AS asset_guid,
a.ASSET_NAME,
a.ASSET_TYPE,
CASE
WHEN a.TAGS IS NOT NULL AND ARRAY_SIZE(a.TAGS) > 0
THEN 1
ELSE 0
END AS has_tags,
CASE
WHEN a.TERM_GUIDS IS NOT NULL AND ARRAY_SIZE(a.TERM_GUIDS) > 0
THEN 1
ELSE 0
END AS has_terms,
CASE
WHEN a.README_GUID IS NOT NULL
THEN 1
ELSE 0
END AS has_readme,
CASE
WHEN a.DESCRIPTION IS NOT NULL AND LENGTH(TRIM(a.DESCRIPTION)) > 0
THEN 1
ELSE 0
END AS has_description
FROM ASSETS a
LEFT JOIN DATA_MESH_DETAILS dmd
ON a.GUID = dmd.GUID
LEFT JOIN domains d
ON dmd.DATA_DOMAIN = d.domain_guid
WHERE a.STATUS = 'ACTIVE'
-- FILTER 2: Optionally filter by specific asset types
-- Uncomment to include only specific asset types:
-- AND a.ASSET_TYPE IN ('Table', 'View', 'Column')
-- FILTER 3: Optionally filter by connector type
-- Uncomment to filter by specific connector:
-- AND a.CONNECTOR_NAME = 'snowflake'
-- FILTER 4: Optionally filter by certification status
-- Uncomment to include only verified assets:
-- AND a.CERTIFICATE_STATUS = 'VERIFIED'
-- FILTER 5: Optionally exclude unassigned assets
-- Uncomment to exclude assets without domain assignment:
-- AND dmd.DATA_DOMAIN IS NOT NULL
-- FILTER 6: Optionally show ONLY unassigned assets
-- Uncomment to show only unassigned assets:
-- AND (dmd.GUID IS NULL OR dmd.DATA_DOMAIN IS NULL)
-- FILTER 7: Optionally filter by creation date
-- Uncomment to filter by assets created after a specific date (epoch milliseconds):
-- AND a.CREATED_AT >= 1640995200000 -- Example: January 1, 2022
-- FILTER 8: Optionally filter by specific owner
-- Uncomment to filter by assets owned by specific users:
-- AND ARRAY_CONTAINS('john.doe'::VARIANT, a.OWNER_USERS)
)
SELECT
domain_name,
domain_guid,
COUNT(*) AS total_assets,
SUM(has_tags) AS assets_with_tags,
ROUND((SUM(has_tags) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_tags,
SUM(has_terms) AS assets_with_terms,
ROUND((SUM(has_terms) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_terms,
SUM(has_readme) AS assets_with_readme,
ROUND((SUM(has_readme) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_readme,
SUM(has_description) AS assets_with_description,
ROUND((SUM(has_description) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_description,
ROUND(
(
(SUM(has_tags) * 100.0) / NULLIF(COUNT(*), 0) +
(SUM(has_terms) * 100.0) / NULLIF(COUNT(*), 0) +
(SUM(has_readme) * 100.0) / NULLIF(COUNT(*), 0) +
(SUM(has_description) * 100.0) / NULLIF(COUNT(*), 0)
) / 4.0,
2
) AS overall_enrichment_score
FROM assets_with_enrichment
GROUP BY domain_name, domain_guid
-- FILTER 9: Optionally set minimum enrichment threshold
-- Uncomment to show only domains with enrichment >= 50%:
-- HAVING overall_enrichment_score >= 50.0
-- FILTER 10: Optionally set minimum asset count
-- Uncomment to show only domains with at least 10 assets:
-- HAVING COUNT(*) >= 10
ORDER BY
CASE WHEN domain_guid = 'UNASSIGNED' THEN 1 ELSE 0 END,
overall_enrichment_score DESC;
-- Calculate metadata enrichment with multiple filters applied
WITH domains AS (
-- Get all Data Domains with their names from ASSETS table
-- FILTER 1: Optionally filter to specific domains
SELECT
a.GUID AS domain_guid,
a.ASSET_NAME AS domain_name
FROM ASSETS a
WHERE a.ASSET_TYPE = 'DataDomain'
AND a.STATUS = 'ACTIVE'
-- Uncomment to filter by specific domains:
-- AND a.ASSET_NAME IN ('Sales Domain', 'Marketing Domain', 'Engineering Domain')
),
assets_with_enrichment AS (
SELECT
COALESCE(d.domain_guid, 'UNASSIGNED') AS domain_guid,
COALESCE(d.domain_name, 'No Domain Assigned') AS domain_name,
a.GUID AS asset_guid,
a.ASSET_NAME,
a.ASSET_TYPE,
CASE
WHEN a.TAGS IS NOT NULL AND ARRAY_SIZE(a.TAGS) > 0
THEN 1
ELSE 0
END AS has_tags,
CASE
WHEN a.TERM_GUIDS IS NOT NULL AND ARRAY_SIZE(a.TERM_GUIDS) > 0
THEN 1
ELSE 0
END AS has_terms,
CASE
WHEN a.README_GUID IS NOT NULL
THEN 1
ELSE 0
END AS has_readme,
CASE
WHEN a.DESCRIPTION IS NOT NULL AND LENGTH(TRIM(a.DESCRIPTION)) > 0
THEN 1
ELSE 0
END AS has_description
FROM ASSETS a
LEFT JOIN DATA_MESH_DETAILS dmd
ON a.GUID = dmd.GUID
LEFT JOIN domains d
ON dmd.DATA_DOMAIN = d.domain_guid
WHERE a.STATUS = 'ACTIVE'
-- FILTER 2: Optionally filter by specific asset types
-- Uncomment to include only specific asset types:
-- AND a.ASSET_TYPE IN ('Table', 'View', 'Column')
-- FILTER 3: Optionally filter by connector type
-- Uncomment to filter by specific connector:
-- AND a.CONNECTOR_NAME = 'snowflake'
-- FILTER 4: Optionally filter by certification status
-- Uncomment to include only verified assets:
-- AND a.CERTIFICATE_STATUS = 'VERIFIED'
-- FILTER 5: Optionally exclude unassigned assets
-- Uncomment to exclude assets without domain assignment:
-- AND dmd.DATA_DOMAIN IS NOT NULL
-- FILTER 6: Optionally show ONLY unassigned assets
-- Uncomment to show only unassigned assets:
-- AND (dmd.GUID IS NULL OR dmd.DATA_DOMAIN IS NULL)
-- FILTER 7: Optionally filter by creation date
-- Uncomment to filter by assets created after a specific date (epoch milliseconds):
-- AND a.CREATED_AT >= 1640995200000 -- Example: January 1, 2022
-- FILTER 8: Optionally filter by specific owner
-- Uncomment to filter by assets owned by specific users:
-- AND ARRAY_CONTAINS('john.doe'::VARIANT, a.OWNER_USERS)
)
SELECT
domain_name,
domain_guid,
COUNT(*) AS total_assets,
SUM(has_tags) AS assets_with_tags,
ROUND((SUM(has_tags) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_tags,
SUM(has_terms) AS assets_with_terms,
ROUND((SUM(has_terms) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_terms,
SUM(has_readme) AS assets_with_readme,
ROUND((SUM(has_readme) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_readme,
SUM(has_description) AS assets_with_description,
ROUND((SUM(has_description) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_description,
ROUND(
(
(SUM(has_tags) * 100.0) / NULLIF(COUNT(*), 0) +
(SUM(has_terms) * 100.0) / NULLIF(COUNT(*), 0) +
(SUM(has_readme) * 100.0) / NULLIF(COUNT(*), 0) +
(SUM(has_description) * 100.0) / NULLIF(COUNT(*), 0)
) / 4.0,
2
) AS overall_enrichment_score
FROM assets_with_enrichment
GROUP BY domain_name, domain_guid
-- FILTER 9: Optionally set minimum enrichment threshold
-- Uncomment to show only domains with enrichment >= 50%:
-- HAVING overall_enrichment_score >= 50.0
-- FILTER 10: Optionally set minimum asset count
-- Uncomment to show only domains with at least 10 assets:
-- HAVING COUNT(*) >= 10
ORDER BY
CASE WHEN domain_guid = 'UNASSIGNED' THEN 1 ELSE 0 END,
overall_enrichment_score DESC;
SET @@dataset_id = 'ATLAN_GOLD';
-- 1. Metadata enrichment tracking
WITH domains AS (
SELECT
a.GUID AS domain_guid,
a.ASSET_NAME AS domain_name
FROM ASSETS a
WHERE a.ASSET_TYPE = 'DataDomain'
AND a.STATUS = 'ACTIVE'
),
assets_with_enrichment AS (
SELECT
COALESCE(d.domain_guid, 'UNASSIGNED') AS domain_guid,
COALESCE(d.domain_name, 'No Domain Assigned') AS domain_name,
a.GUID AS asset_guid,
a.ASSET_NAME,
a.ASSET_TYPE,
CASE
WHEN a.TAGS IS NOT NULL AND ARRAY_LENGTH(a.TAGS) > 0
THEN 1
ELSE 0
END AS has_tags,
CASE
WHEN a.TERM_GUIDS IS NOT NULL AND ARRAY_LENGTH(a.TERM_GUIDS) > 0
THEN 1
ELSE 0
END AS has_terms,
CASE
WHEN a.README_GUID IS NOT NULL
THEN 1
ELSE 0
END AS has_readme,
CASE
WHEN a.DESCRIPTION IS NOT NULL AND LENGTH(TRIM(a.DESCRIPTION)) > 0
THEN 1
ELSE 0
END AS has_description
FROM ASSETS a
LEFT JOIN DATA_MESH_DETAILS dmd
ON a.GUID = dmd.GUID
LEFT JOIN domains d
ON dmd.DATA_DOMAIN = d.domain_guid
WHERE a.STATUS = 'ACTIVE'
-- Example: Filter by multiple tags (both must exist)
-- AND 'Finance' IN UNNEST(a.TAGS)
-- AND 'Verified' IN UNNEST(a.TAGS)
-- Example: Filter by owner
-- AND 'john.doe' IN UNNEST(a.OWNER_USERS)
-- Example: Filter by date (last 90 days)
-- AND a.CREATED_AT >= UNIX_MILLIS(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 90 -- DAY))
)
SELECT
domain_name,
domain_guid,
COUNT(*) AS total_assets,
SUM(has_tags) AS assets_with_tags,
ROUND((SUM(has_tags) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_tags,
SUM(has_terms) AS assets_with_terms,
ROUND((SUM(has_terms) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_terms,
SUM(has_readme) AS assets_with_readme,
ROUND((SUM(has_readme) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_readme,
SUM(has_description) AS assets_with_description,
ROUND((SUM(has_description) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_description,
ROUND(
(
(SUM(has_tags) * 100.0) / NULLIF(COUNT(*), 0) +
(SUM(has_terms) * 100.0) / NULLIF(COUNT(*), 0) +
(SUM(has_readme) * 100.0) / NULLIF(COUNT(*), 0) +
(SUM(has_description) * 100.0) / NULLIF(COUNT(*), 0)
) / 4.0,
2
) AS overall_enrichment_score
FROM assets_with_enrichment
GROUP BY domain_name, domain_guid;
Sample output
This sample output shows enrichment coverage by data domain. Use it to compare tag, term, readme, and description completeness across domains and spot gaps.
Sample output table
| DOMAIN_NAME | DOMAIN_GUID | TOTAL_ASSETS | ASSETS_WITH_TAGS | PCT_WITH_TAGS | ASSETS_WITH_TERMS | PCT_WITH_TERMS | ASSETS_WITH_README | PCT_WITH_README | ASSETS_WITH_DESCRIPTION | PCT_WITH_DESCRIPTION | OVERALL_ENRICHMENT_SCORE |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Rail | 9189e4d3-02b9-4318-b168-6e634ac535d0 | 1 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 100.00 |
| Workforce | 3094ce14-acf0-48fa-9b25-bd72c7d6a82e | 2 | 2 | 100.00 | 2 | 100.00 | 2 | 100.00 | 2 | 100.00 | 100.00 |
| Customer 360 | e24235f6-ef1e-4884-81a4-f5f76934b8b7 | 2 | 2 | 100.00 | 2 | 100.00 | 2 | 100.00 | 2 | 100.00 | 100.00 |
| Clinical Development | 09e693cb-49e7-4b14-b142-31812a39a8ae | 1 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 100.00 |
| Phase II Studies | e6ecf919-bfeb-477b-91ca-bdaa55130f58 | 1 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 100.00 |
| Source Data Verification Tracking | daa341f8-45c0-492b-a8d7-1a8bc03aca72 | 1 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 100.00 |
| Investment Management | 75f0e00c-e1e3-4d45-84f1-c361cc13a4dd | 2 | 2 | 100.00 | 2 | 100.00 | 2 | 100.00 | 2 | 100.00 | 100.00 |
| Fraud | 9d22eaaf-5890-458b-891f-e56c3f3605bb | 1 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 100.00 |
| Wealth Management | ce5f44eb-ab1f-4a3e-aa23-0ad18083b95c | 1 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 100.00 |
| Medicine | 1d522bd6-18d4-4a06-a7ba-71cf4e1526bf | 1 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 100.00 |
| Phase III Studies | 86d23a7f-ea1b-458e-8796-0e9431f29742 | 2 | 2 | 100.00 | 1 | 50.00 | 2 | 100.00 | 2 | 100.00 | 87.50 |
| Order Management | c26940ea-7d5a-4cf7-af99-55494916bdca | 5 | 4 | 80.00 | 4 | 80.00 | 4 | 80.00 | 5 | 100.00 | 85.00 |
| Study Registry | af9dd2d0-5272-4e45-8ba9-0e30bb2861c2 | 1 | 1 | 100.00 | 1 | 100.00 | 0 | 0.00 | 1 | 100.00 | 75.00 |
| Risk | 37ee7bde-3f17-4b5e-b210-b5bc5226e72c | 1 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 0 | 0.00 | 75.00 |
| Human Resources (HR) | 431c152d-bb84-4bc7-9d20-9c98ba9c0c9b | 1 | 0 | 0.00 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 75.00 |
| Insights, Analytics, & Operations | 451bfe73-13d0-4795-8bc6-d5ab148ae3ac | 1 | 0 | 0.00 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 75.00 |
| Commerical | d5de0399-2e82-4eb5-9241-ca595d5c87f0 | 1 | 1 | 100.00 | 0 | 0.00 | 1 | 100.00 | 1 | 100.00 | 75.00 |
| Flight Operations | 7d9b3ee7-1873-4378-b479-9a5d223a30b2 | 1 | 0 | 0.00 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 75.00 |
| Fin | 1266cb08-685e-444e-9513-ef4c6ab87a48 | 1 | 1 | 100.00 | 1 | 100.00 | 0 | 0.00 | 1 | 100.00 | 75.00 |
| Finance | 5431ea4b-d00b-41af-9167-18d4da7df767 | 1 | 1 | 100.00 | 0 | 0.00 | 1 | 100.00 | 1 | 100.00 | 75.00 |
| Marketing | ae9d175a-1963-43df-8634-6cf6d450bdfe | 1 | 0 | 0.00 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 75.00 |
| Operations | 8e053ae6-778a-45c2-b664-4f7ea3c1b201 | 6 | 4 | 66.67 | 4 | 66.67 | 4 | 66.67 | 5 | 83.33 | 70.83 |
| Finance | 83ed2107-d2ce-4da8-aa1a-6a23e91576cb | 8 | 5 | 62.50 | 5 | 62.50 | 5 | 62.50 | 7 | 87.50 | 68.75 |
| Forecasting | 802ab2e5-7e49-445f-abf6-7e41c6bf467a | 5 | 2 | 40.00 | 3 | 60.00 | 3 | 60.00 | 5 | 100.00 | 65.00 |
| Sales | c55dc63b-bbf6-4d89-80d7-87372c2b4e3d | 2 | 1 | 50.00 | 1 | 50.00 | 1 | 50.00 | 2 | 100.00 | 62.50 |
| Services | 7c901dcb-c0d0-4034-98e8-280a34e1f1eb | 3 | 1 | 33.33 | 1 | 33.33 | 2 | 66.67 | 3 | 100.00 | 58.33 |
| Customer | a150d59c-964f-4edf-ad67-2d800021769f | 10 | 5 | 50.00 | 3 | 30.00 | 5 | 50.00 | 10 | 100.00 | 57.50 |
| Capital Markets | 72d12e25-ac65-4d3e-aeb3-f465b6a065cb | 2 | 1 | 50.00 | 1 | 50.00 | 0 | 0.00 | 2 | 100.00 | 50.00 |
| Machine Learning | 936da2c9-61d9-4c3b-8220-33940d5ee20d | 4 | 1 | 25.00 | 1 | 25.00 | 1 | 25.00 | 2 | 50.00 | 31.25 |
| Occupancy | a47f32e4-d26c-4342-9419-a737263e299d | 1 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 1 | 100.00 | 25.00 |
| Value based care | d1187d88-3220-45ad-828c-e92023d4c91f | 1 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 1 | 100.00 | 25.00 |
| AirFare Calculator | e935fd96-1ada-4d7c-9a46-240a31959bb6 | 1 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0.00 |
| Cross-Functional Reporting | a105956e-5312-4e81-b518-b88487621e80 | 1 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0.00 |
| Racing | f2d36c28-19b2-4e44-a341-59e706f0244e | 2 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0.00 |
| Food & Beverage | 526de60b-aa52-41b5-84fa-ed837f6add3b | 2 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0.00 |
| No Domain Assigned | UNASSIGNED | 912271 | 109422 | 11.99 | 665 | 0.07 | 367 | 0.04 | 372862 | 40.87 | 13.24 |