Metadata completeness tracking
Organizations rely on a metadata catalog like Atlan to make data discoverable, trustworthy, and reusable across teams. As metadata is harvested from different sources, enrichment such as descriptions, owners, tags, certifications, and custom metadata can be inconsistent or incomplete. Use Lakehouse to track enrichment coverage so you can identify governance gaps, prioritize cleanup efforts, and continuously improve the quality and usability of your data assets.
Before you begin
Before you run these queries, make sure:
- Your Lakehouse setup is complete and Lakehouse is enabled for your organization. If setup isn't complete, see Get started with Lakehouse.
- The Gold namespace is available natively in the Lakehouse catalog -- no additional setup is required.
Metadata completeness
This query measures metadata completeness across asset types by analyzing how many assets have descriptions, tags, certifications, owners, and Custom Metadata (for example, AI Readiness). It produces a governance scorecard showing enrichment gaps.
- Snowflake
- Databricks
- BigQuery
-- ============================================
-- ASSET COMPLETENESS & ENRICHMENT TRACKING
-- ============================================
-- Purpose:
-- Measure metadata completeness and governance maturity
-- across asset types by calculating:
-- • Description coverage (documented vs. undocumented)
-- • Tag coverage (tagged vs. untagged)
-- • Certification coverage (verified vs. uncertified)
-- • Ownership coverage (owned vs. orphaned)
-- • Custom Metadata enrichment (enriched vs. not enriched)
--
-- Output: Governance scorecard for catalog quality assessment
-- ============================================
WITH tag_stats AS (
-- --------------------------------------------
-- STEP 1a: Calculate tag coverage per asset
-- --------------------------------------------
-- Tags are queried from entity_metadata.TagRelationship
-- since the TAGS column is no longer on the ASSETS table
-- --------------------------------------------
SELECT guid, COUNT(*) AS tag_count
FROM entity_metadata.TagRelationship
WHERE status = 'ACTIVE'
GROUP BY guid
),
cm_stats AS (
-- --------------------------------------------
-- STEP 1b: Calculate Custom Metadata enrichment
-- --------------------------------------------
-- For each asset, count how many Custom Metadata
-- properties are populated. This measures specialized
-- enrichment like AI Readiness, Data Quality, etc.
--
-- Custom Metadata is queried from entity_metadata.CustomMetadata
--
-- 🔧 CUSTOMIZE: Update the CM name filter in FLATTEN
-- to match your organization's Custom Metadata sets
-- --------------------------------------------
SELECT
alt.guid AS asset_guid,
SUM(
-- Only count CM properties for specified CM sets
CASE
WHEN (
ARRAY_CONTAINS('ALL'::VARIANT, ARRAY_CONSTRUCT('XX')) -- Use 'ALL' to include all CM sets
OR cm.customMetadataSetName IN (
-- 🔧 CUSTOMIZE: List your Custom Metadata set names here
SELECT value::STRING
FROM TABLE(FLATTEN(input => ARRAY_CONSTRUCT('AI Readiness')))
)
)
THEN
-- Check if the CM attribute has a non-null, non-empty value
CASE
WHEN cm.attributeValue IS NULL THEN 0
-- Handle array-type CM attributes
WHEN IS_ARRAY(TRY_PARSE_JSON(cm.attributeValue))
THEN
CASE
WHEN ARRAY_SIZE(TRY_PARSE_JSON(cm.attributeValue)) > 0
THEN 1
ELSE 0
END
-- Handle non-array CM attributes with values
WHEN TYPEOF(TRY_PARSE_JSON(cm.attributeValue)) <> 'ARRAY'
AND cm.attributeValue IS NOT NULL
THEN 1
ELSE 0
END
ELSE 0
END
) AS linked_cm_prop_count
FROM gold.assets alt
LEFT JOIN entity_metadata.CustomMetadata cm
ON alt.guid = cm.GUID
WHERE 1=1
GROUP BY alt.guid
),
entity_stats AS (
-- --------------------------------------------
-- STEP 2: Calculate enrichment metrics per asset type
-- --------------------------------------------
-- For each asset type, count assets with/without:
-- • Descriptions
-- • Tags
-- • Certifications
-- • Owners
-- • Custom Metadata
-- --------------------------------------------
SELECT
asset_type,
COUNT(*) AS total_count,
-- ========================================
-- DESCRIPTION COVERAGE
-- ========================================
COUNT(
CASE
WHEN (description IS NOT NULL AND description <> '')
OR (user_description IS NOT NULL AND user_description <> '')
THEN 1
END
) AS with_description,
COUNT(
CASE
WHEN (description IS NULL OR description = '')
AND (user_description IS NULL OR user_description = '')
THEN 1
END
) AS without_description,
-- ========================================
-- CERTIFICATION COVERAGE
-- ========================================
COUNT(
CASE
WHEN LOWER(certificate_status) = 'verified'
THEN 1
END
) AS certified,
COUNT(
CASE
WHEN LOWER(certificate_status) != 'verified'
OR certificate_status IS NULL
THEN 1
END
) AS uncertified,
-- ========================================
-- TAG COVERAGE
-- ========================================
COUNT(
CASE
WHEN (tr.tag_count > 0)
THEN 1
END
) AS with_tags,
COUNT(
CASE
WHEN (tr.tag_count IS NULL OR tr.tag_count = 0)
THEN 1
END
) AS without_tags,
-- ========================================
-- OWNERSHIP COVERAGE
-- ========================================
-- ASSETS has both OWNER_USERS and OWNER_GROUPS fields
-- ========================================
COUNT(
CASE
WHEN (owner_users IS NOT NULL AND ARRAY_SIZE(owner_users) > 0)
OR (owner_groups IS NOT NULL AND ARRAY_SIZE(owner_groups) > 0)
THEN 1
END
) AS with_owners,
COUNT(
CASE
WHEN (owner_users IS NULL OR ARRAY_SIZE(owner_users) = 0)
AND (owner_groups IS NULL OR ARRAY_SIZE(owner_groups) = 0)
THEN 1
END
) AS without_owners,
-- ========================================
-- CUSTOM METADATA ENRICHMENT
-- ========================================
COUNT(
CASE
WHEN (linked_cm_prop_count > 0)
THEN 1
END
) AS with_linked_cm_props,
COUNT(
CASE
WHEN (linked_cm_prop_count = 0 OR linked_cm_prop_count IS NULL)
THEN 1
END
) AS without_linked_cm_props
FROM gold.assets alt
LEFT JOIN tag_stats tr
ON alt.guid = tr.guid
LEFT JOIN cm_stats cm
ON alt.guid = cm.asset_guid
WHERE
-- ========================================
-- ASSET TYPE FILTER (FIXED)
-- ========================================
-- Focus on key asset types for governance reporting
-- 🔧 CUSTOMIZE: Add/remove asset types as needed
-- ========================================
asset_type IN (
'Table',
'Schema',
'TableauDashboard',
'TableauWorkbook',
'DataDomain',
'DataProduct',
'AtlasGlossaryTerm',
'AtlasGlossaryCategory',
'AtlasGlossary'
)
GROUP BY asset_type
)
-- ============================================
-- STEP 3: Generate enrichment scorecard
-- ============================================
-- Output enrichment metrics with coverage percentages
-- for executive reporting and governance dashboards
-- ============================================
SELECT
asset_type,
total_count,
-- Description metrics
with_description,
without_description,
ROUND((with_description * 100.0) / NULLIF(total_count, 0), 2) AS description_coverage_pct,
-- Tag metrics
with_tags,
without_tags,
ROUND((with_tags * 100.0) / NULLIF(total_count, 0), 2) AS tag_coverage_pct,
-- Certification metrics
certified,
uncertified,
ROUND((certified * 100.0) / NULLIF(total_count, 0), 2) AS certification_coverage_pct,
-- Ownership metrics
with_owners,
without_owners,
ROUND((with_owners * 100.0) / NULLIF(total_count, 0), 2) AS ownership_coverage_pct,
-- Custom Metadata metrics
with_linked_cm_props,
without_linked_cm_props,
ROUND((with_linked_cm_props * 100.0) / NULLIF(total_count, 0), 2) AS custom_metadata_coverage_pct
FROM entity_stats
ORDER BY asset_type;
-- ============================================
-- ASSET COMPLETENESS & ENRICHMENT TRACKING
-- ============================================
WITH tag_stats AS (
-- Tag coverage from entity_metadata.TagRelationship
SELECT guid, COUNT(*) AS tag_count
FROM entity_metadata.TagRelationship
WHERE status = 'ACTIVE'
GROUP BY guid
),
cm_stats AS (
SELECT
alt.guid AS asset_guid,
SUM(
CASE
WHEN (
array_contains(array('XX'), 'ALL') -- Use 'ALL' to include all CM sets
OR cm.customMetadataSetName IN (
-- 🔧 CUSTOMIZE: List your Custom Metadata set names
SELECT value
FROM (SELECT explode(array('AI Readiness')) AS value)
)
)
THEN
CASE
WHEN cm.attributeValue IS NULL THEN 0
-- Check if value is a JSON array by looking for '[' at start
-- and checking if it can be parsed
WHEN TRIM(cm.attributeValue) LIKE '[%]'
AND get_json_object(cm.attributeValue, '$[0]') IS NOT NULL
THEN 1
-- Non-array values that are not null
WHEN cm.attributeValue IS NOT NULL
AND TRIM(cm.attributeValue) <> ''
AND TRIM(cm.attributeValue) NOT LIKE '[%]'
THEN 1
ELSE 0
END
ELSE 0
END
) AS linked_cm_prop_count
FROM gold.assets alt
LEFT JOIN entity_metadata.CustomMetadata cm
ON alt.guid = cm.GUID
WHERE TRUE
GROUP BY alt.guid
),
entity_stats AS (
SELECT
asset_type,
COUNT(*) AS total_count,
-- Description coverage
COUNT(CASE WHEN (description IS NOT NULL AND description <> '') OR (user_description IS NOT NULL AND user_description <> '') THEN 1 END) AS with_description,
COUNT(CASE WHEN (description IS NULL OR description = '') AND (user_description IS NULL OR user_description = '') THEN 1 END) AS without_description,
-- Certification coverage
COUNT(CASE WHEN LOWER(certificate_status) = 'verified' THEN 1 END) AS certified,
COUNT(CASE WHEN LOWER(certificate_status) != 'verified' OR certificate_status IS NULL THEN 1 END) AS uncertified,
-- Tag coverage
COUNT(CASE WHEN tr.tag_count > 0 THEN 1 END) AS with_tags,
COUNT(CASE WHEN tr.tag_count IS NULL OR tr.tag_count = 0 THEN 1 END) AS without_tags,
-- Ownership coverage (OWNER_USERS and OWNER_GROUPS)
COUNT(CASE WHEN (owner_users IS NOT NULL AND size(owner_users) > 0) OR (owner_groups IS NOT NULL AND size(owner_groups) > 0) THEN 1 END) AS with_owners,
COUNT(CASE WHEN (owner_users IS NULL OR size(owner_users) = 0) AND (owner_groups IS NULL OR size(owner_groups) = 0) THEN 1 END) AS without_owners,
-- Custom Metadata coverage
COUNT(CASE WHEN linked_cm_prop_count > 0 THEN 1 END) AS with_linked_cm_props,
COUNT(CASE WHEN linked_cm_prop_count = 0 OR linked_cm_prop_count IS NULL THEN 1 END) AS without_linked_cm_props
FROM gold.assets alt
LEFT JOIN tag_stats tr
ON alt.guid = tr.guid
LEFT JOIN cm_stats cm
ON alt.guid = cm.asset_guid
WHERE
asset_type IN (
'Table',
'Schema',
'TableauDashboard',
'TableauWorkbook',
'DataDomain',
'DataProduct',
'AtlasGlossaryTerm',
'AtlasGlossaryCategory',
'AtlasGlossary'
)
GROUP BY asset_type
)
SELECT
asset_type,
total_count,
-- Description metrics
with_description,
without_description,
ROUND((with_description * 100.0) / NULLIF(total_count, 0), 2) AS description_coverage_pct,
-- Tag metrics
with_tags,
without_tags,
ROUND((with_tags * 100.0) / NULLIF(total_count, 0), 2) AS tag_coverage_pct,
-- Certification metrics
certified,
uncertified,
ROUND((certified * 100.0) / NULLIF(total_count, 0), 2) AS certification_coverage_pct,
-- Ownership metrics
with_owners,
without_owners,
ROUND((with_owners * 100.0) / NULLIF(total_count, 0), 2) AS ownership_coverage_pct,
-- Custom Metadata metrics
with_linked_cm_props,
without_linked_cm_props,
ROUND((with_linked_cm_props * 100.0) / NULLIF(total_count, 0), 2) AS custom_metadata_coverage_pct
FROM entity_stats
ORDER BY asset_type;
-- ============================================
-- ASSET COMPLETENESS & ENRICHMENT TRACKING
-- ============================================
WITH tag_stats AS (
-- Tag coverage from entity_metadata.TagRelationship
SELECT guid, COUNT(*) AS tag_count
FROM entity_metadata.TagRelationship
WHERE status = 'ACTIVE'
GROUP BY guid
),
cm_stats AS (
SELECT
alt.guid AS asset_guid,
SUM(
CASE
WHEN (
'ALL' IN UNNEST(['XX']) -- Use 'ALL' to include all CM sets
OR cm.customMetadataSetName IN (
-- 🔧 CUSTOMIZE: List your Custom Metadata set names
SELECT value
FROM UNNEST(['AI Readiness']) AS value
)
)
THEN
CASE
WHEN cm.attributeValue IS NULL THEN 0
-- Handle array-type CM attributes
WHEN SAFE.PARSE_JSON(cm.attributeValue) IS NOT NULL
AND JSON_TYPE(SAFE.PARSE_JSON(cm.attributeValue)) = 'array'
THEN
CASE
WHEN ARRAY_LENGTH(JSON_EXTRACT_ARRAY(cm.attributeValue)) > 0
THEN 1
ELSE 0
END
-- Handle non-array CM attributes with values
WHEN cm.attributeValue IS NOT NULL
THEN 1
ELSE 0
END
ELSE 0
END
) AS linked_cm_prop_count
FROM gold.assets alt
LEFT JOIN entity_metadata.CustomMetadata cm
ON alt.guid = cm.GUID
WHERE TRUE
GROUP BY alt.guid
),
entity_stats AS (
SELECT
asset_type,
COUNT(*) AS total_count,
-- Description coverage
COUNTIF((description IS NOT NULL AND description <> '') OR (user_description IS NOT NULL AND user_description <> '')) AS with_description,
COUNTIF((description IS NULL OR description = '') AND (user_description IS NULL OR user_description = '')) AS without_description,
-- Certification coverage
COUNTIF(LOWER(certificate_status) = 'verified') AS certified,
COUNTIF(LOWER(certificate_status) != 'verified' OR certificate_status IS NULL) AS uncertified,
-- Tag coverage
COUNTIF(tr.tag_count > 0) AS with_tags,
COUNTIF(tr.tag_count IS NULL OR tr.tag_count = 0) AS without_tags,
-- Ownership coverage (OWNER_USERS and OWNER_GROUPS)
COUNTIF((owner_users IS NOT NULL AND ARRAY_LENGTH(owner_users) > 0) OR (owner_groups IS NOT NULL AND ARRAY_LENGTH(owner_groups) > 0)) AS with_owners,
COUNTIF((owner_users IS NULL OR ARRAY_LENGTH(owner_users) = 0) AND (owner_groups IS NULL OR ARRAY_LENGTH(owner_groups) = 0)) AS without_owners,
-- Custom Metadata coverage
COUNTIF(linked_cm_prop_count > 0) AS with_linked_cm_props,
COUNTIF(linked_cm_prop_count = 0 OR linked_cm_prop_count IS NULL) AS without_linked_cm_props
FROM gold.assets alt
LEFT JOIN tag_stats tr
ON alt.guid = tr.guid
LEFT JOIN cm_stats cm
ON alt.guid = cm.asset_guid
WHERE
asset_type IN (
'Table',
'Schema',
'TableauDashboard',
'TableauWorkbook',
'DataDomain',
'DataProduct',
'AtlasGlossaryTerm',
'AtlasGlossaryCategory',
'AtlasGlossary'
)
GROUP BY asset_type
)
SELECT
asset_type,
total_count,
-- Description metrics
with_description,
without_description,
ROUND((with_description * 100.0) / NULLIF(total_count, 0), 2) AS description_coverage_pct,
-- Tag metrics
with_tags,
without_tags,
ROUND((with_tags * 100.0) / NULLIF(total_count, 0), 2) AS tag_coverage_pct,
-- Certification metrics
certified,
uncertified,
ROUND((certified * 100.0) / NULLIF(total_count, 0), 2) AS certification_coverage_pct,
-- Ownership metrics
with_owners,
without_owners,
ROUND((with_owners * 100.0) / NULLIF(total_count, 0), 2) AS ownership_coverage_pct,
-- Custom Metadata metrics
with_linked_cm_props,
without_linked_cm_props,
ROUND((with_linked_cm_props * 100.0) / NULLIF(total_count, 0), 2) AS custom_metadata_coverage_pct
FROM entity_stats
ORDER BY asset_type;
Sample output
| ASSET_TYPE | TOTAL_COUNT | WITH_DESCRIPTION | WITHOUT_DESCRIPTION | DESCRIPTION_COVERAGE_PCT | WITH_TAGS | WITHOUT_TAGS | TAG_COVERAGE_PCT | CERTIFIED | UNCERTIFIED | CERTIFICATION_COVERAGE_PCT | WITH_OWNERS | WITHOUT_OWNERS | OWNERSHIP_COVERAGE_PCT | WITH_LINKED_CM_PROPS | WITHOUT_LINKED_CM_PROPS | CUSTOM_METADATA_COVERAGE_PCT |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| AtlasGlossary | 96 | 71 | 25 | 73.96 | 0 | 96 | 0.00 | 47 | 49 | 48.96 | 27 | 69 | 28.13 | 0 | 96 | 0.00 |
| AtlasGlossaryCategory | 633 | 463 | 170 | 73.14 | 16 | 617 | 2.53 | 163 | 470 | 25.75 | 91 | 542 | 14.38 | 0 | 633 | 0.00 |
| AtlasGlossaryTerm | 4526 | 4407 | 119 | 97.37 | 76 | 4450 | 1.68 | 495 | 4031 | 10.94 | 270 | 4256 | 5.97 | 2 | 4524 | 0.04 |
| DataDomain | 115 | 56 | 59 | 48.70 | 0 | 115 | 0.00 | 0 | 115 | 0.00 | 114 | 1 | 99.13 | 0 | 115 | 0.00 |
| DataProduct | 136 | 101 | 35 | 74.26 | 49 | 87 | 36.03 | 45 | 91 | 33.09 | 126 | 10 | 92.65 | 0 | 136 | 0.00 |
| Schema | 587 | 38 | 549 | 6.47 | 5 | 582 | 0.85 | 5 | 582 | 0.85 | 31 | 556 | 5.28 | 0 | 587 | 0.00 |
| Table | 24771 | 725 | 24046 | 2.93 | 17620 | 7151 | 71.13 | 17609 | 7162 | 71.09 | 703 | 24068 | 2.84 | 9 | 24762 | 0.04 |
| TableauDashboard | 23 | 23 | 0 | 100.00 | 23 | 0 | 100.00 | 17 | 6 | 73.91 | 19 | 4 | 82.61 | 0 | 23 | 0.00 |
| TableauWorkbook | 17 | 13 | 4 | 76.47 | 17 | 0 | 100.00 | 11 | 6 | 64.71 | 15 | 2 | 88.24 | 0 | 17 | 0.00 |
Metadata completeness by data domain
This query calculates metadata enrichment statistics for all assets, grouped by their assigned Data Domains. It provides a domain-level view of metadata completeness across four key dimensions:
- Tags: Classification metadata coverage within each domain
- Glossary Terms: Business terminology assignments by domain
- Readme: Documentation coverage per domain
- Descriptions: Asset description completeness by domain
The query includes a special category for assets that aren't assigned to any domain, allowing you to identify and track unassigned assets that may need domain assignment as part of your data mesh governance strategy.
Update placeholders such as <db_name> and custom metadata names to match your environment. This query tracks enrichment coverage across asset types and is flexible. Adjust the asset type filters or add filters such as data source, domain, or platform as needed.
- Snowflake
- Databricks
- BigQuery
-- Calculate metadata enrichment with multiple filters applied
WITH tag_stats AS (
-- Tag coverage from entity_metadata.TagRelationship
SELECT guid, COUNT(*) AS tag_count
FROM entity_metadata.TagRelationship
WHERE status = 'ACTIVE'
GROUP BY guid
),
domains AS (
-- Get all Data Domains with their names from ASSETS table
-- FILTER 1: Optionally filter to specific domains
SELECT
a.guid AS domain_guid,
a.asset_name AS domain_name
FROM gold.assets a
WHERE a.asset_type = 'DataDomain'
AND a.status = 'ACTIVE'
-- Uncomment to filter by specific domains:
-- AND a.asset_name IN ('Sales Domain', 'Marketing Domain', 'Engineering Domain')
),
assets_with_enrichment AS (
SELECT
COALESCE(d.domain_guid, 'UNASSIGNED') AS domain_guid,
COALESCE(d.domain_name, 'No Domain Assigned') AS domain_name,
a.guid AS asset_guid,
a.asset_name,
a.asset_type,
CASE
WHEN tr.tag_count > 0
THEN 1
ELSE 0
END AS has_tags,
CASE
WHEN a.term_guids IS NOT NULL AND ARRAY_SIZE(a.term_guids) > 0
THEN 1
ELSE 0
END AS has_terms,
CASE
WHEN a.readme_guid IS NOT NULL
THEN 1
ELSE 0
END AS has_readme,
CASE
WHEN (a.description IS NOT NULL AND LENGTH(TRIM(a.description)) > 0)
OR (a.user_description IS NOT NULL AND LENGTH(TRIM(a.user_description)) > 0)
THEN 1
ELSE 0
END AS has_description
FROM gold.assets a
LEFT JOIN tag_stats tr
ON a.guid = tr.guid
LEFT JOIN data_mesh_details dmd
ON a.guid = dmd.guid
LEFT JOIN domains d
ON dmd.data_domain = d.domain_guid
WHERE a.status = 'ACTIVE'
-- FILTER 2: Optionally filter by specific asset types
-- Uncomment to include only specific asset types:
-- AND a.asset_type IN ('Table', 'View', 'Column')
-- FILTER 3: Optionally filter by connector type
-- Uncomment to filter by specific connector:
-- AND a.connector_name = 'snowflake'
-- FILTER 4: Optionally filter by certification status
-- Uncomment to include only verified assets:
-- AND a.certificate_status = 'VERIFIED'
-- FILTER 5: Optionally exclude unassigned assets
-- Uncomment to exclude assets without domain assignment:
-- AND dmd.data_domain IS NOT NULL
-- FILTER 6: Optionally show ONLY unassigned assets
-- Uncomment to show only unassigned assets:
-- AND (dmd.guid IS NULL OR dmd.data_domain IS NULL)
-- FILTER 7: Optionally filter by creation date
-- Uncomment to filter by assets created after a specific date (epoch milliseconds):
-- AND a.created_at >= 1640995200000 -- Example: January 1, 2022
-- FILTER 8: Optionally filter by specific owner
-- Uncomment to filter by assets owned by specific users:
-- AND ARRAY_CONTAINS('john.doe'::VARIANT, a.owner_users)
)
SELECT
domain_name,
domain_guid,
COUNT(*) AS total_assets,
SUM(has_tags) AS assets_with_tags,
ROUND((SUM(has_tags) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_tags,
SUM(has_terms) AS assets_with_terms,
ROUND((SUM(has_terms) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_terms,
SUM(has_readme) AS assets_with_readme,
ROUND((SUM(has_readme) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_readme,
SUM(has_description) AS assets_with_description,
ROUND((SUM(has_description) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_description,
ROUND(
(
(SUM(has_tags) * 100.0) / NULLIF(COUNT(*), 0) +
(SUM(has_terms) * 100.0) / NULLIF(COUNT(*), 0) +
(SUM(has_readme) * 100.0) / NULLIF(COUNT(*), 0) +
(SUM(has_description) * 100.0) / NULLIF(COUNT(*), 0)
) / 4.0,
2
) AS overall_enrichment_score
FROM assets_with_enrichment
GROUP BY domain_name, domain_guid
-- FILTER 9: Optionally set minimum enrichment threshold
-- Uncomment to show only domains with enrichment >= 50%:
-- HAVING overall_enrichment_score >= 50.0
-- FILTER 10: Optionally set minimum asset count
-- Uncomment to show only domains with at least 10 assets:
-- HAVING COUNT(*) >= 10
ORDER BY
CASE WHEN domain_guid = 'UNASSIGNED' THEN 1 ELSE 0 END,
overall_enrichment_score DESC;
-- Calculate metadata enrichment with multiple filters applied
WITH tag_stats AS (
-- Tag coverage from entity_metadata.TagRelationship
SELECT guid, COUNT(*) AS tag_count
FROM entity_metadata.TagRelationship
WHERE status = 'ACTIVE'
GROUP BY guid
),
domains AS (
-- Get all Data Domains with their names from ASSETS table
-- FILTER 1: Optionally filter to specific domains
SELECT
a.guid AS domain_guid,
a.asset_name AS domain_name
FROM gold.assets a
WHERE a.asset_type = 'DataDomain'
AND a.status = 'ACTIVE'
-- Uncomment to filter by specific domains:
-- AND a.asset_name IN ('Sales Domain', 'Marketing Domain', 'Engineering Domain')
),
assets_with_enrichment AS (
SELECT
COALESCE(d.domain_guid, 'UNASSIGNED') AS domain_guid,
COALESCE(d.domain_name, 'No Domain Assigned') AS domain_name,
a.guid AS asset_guid,
a.asset_name,
a.asset_type,
CASE
WHEN tr.tag_count > 0
THEN 1
ELSE 0
END AS has_tags,
CASE
WHEN a.term_guids IS NOT NULL AND ARRAY_SIZE(a.term_guids) > 0
THEN 1
ELSE 0
END AS has_terms,
CASE
WHEN a.readme_guid IS NOT NULL
THEN 1
ELSE 0
END AS has_readme,
CASE
WHEN (a.description IS NOT NULL AND LENGTH(TRIM(a.description)) > 0)
OR (a.user_description IS NOT NULL AND LENGTH(TRIM(a.user_description)) > 0)
THEN 1
ELSE 0
END AS has_description
FROM gold.assets a
LEFT JOIN tag_stats tr
ON a.guid = tr.guid
LEFT JOIN data_mesh_details dmd
ON a.guid = dmd.guid
LEFT JOIN domains d
ON dmd.data_domain = d.domain_guid
WHERE a.status = 'ACTIVE'
-- FILTER 2: Optionally filter by specific asset types
-- Uncomment to include only specific asset types:
-- AND a.asset_type IN ('Table', 'View', 'Column')
-- FILTER 3: Optionally filter by connector type
-- Uncomment to filter by specific connector:
-- AND a.connector_name = 'snowflake'
-- FILTER 4: Optionally filter by certification status
-- Uncomment to include only verified assets:
-- AND a.certificate_status = 'VERIFIED'
-- FILTER 5: Optionally exclude unassigned assets
-- Uncomment to exclude assets without domain assignment:
-- AND dmd.data_domain IS NOT NULL
-- FILTER 6: Optionally show ONLY unassigned assets
-- Uncomment to show only unassigned assets:
-- AND (dmd.guid IS NULL OR dmd.data_domain IS NULL)
-- FILTER 7: Optionally filter by creation date
-- Uncomment to filter by assets created after a specific date (epoch milliseconds):
-- AND a.created_at >= 1640995200000 -- Example: January 1, 2022
-- FILTER 8: Optionally filter by specific owner
-- Uncomment to filter by assets owned by specific users:
-- AND ARRAY_CONTAINS('john.doe'::VARIANT, a.owner_users)
)
SELECT
domain_name,
domain_guid,
COUNT(*) AS total_assets,
SUM(has_tags) AS assets_with_tags,
ROUND((SUM(has_tags) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_tags,
SUM(has_terms) AS assets_with_terms,
ROUND((SUM(has_terms) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_terms,
SUM(has_readme) AS assets_with_readme,
ROUND((SUM(has_readme) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_readme,
SUM(has_description) AS assets_with_description,
ROUND((SUM(has_description) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_description,
ROUND(
(
(SUM(has_tags) * 100.0) / NULLIF(COUNT(*), 0) +
(SUM(has_terms) * 100.0) / NULLIF(COUNT(*), 0) +
(SUM(has_readme) * 100.0) / NULLIF(COUNT(*), 0) +
(SUM(has_description) * 100.0) / NULLIF(COUNT(*), 0)
) / 4.0,
2
) AS overall_enrichment_score
FROM assets_with_enrichment
GROUP BY domain_name, domain_guid
-- FILTER 9: Optionally set minimum enrichment threshold
-- Uncomment to show only domains with enrichment >= 50%:
-- HAVING overall_enrichment_score >= 50.0
-- FILTER 10: Optionally set minimum asset count
-- Uncomment to show only domains with at least 10 assets:
-- HAVING COUNT(*) >= 10
ORDER BY
CASE WHEN domain_guid = 'UNASSIGNED' THEN 1 ELSE 0 END,
overall_enrichment_score DESC;
SET @@dataset_id = 'ATLAN_GOLD';
-- 1. Metadata enrichment tracking
WITH tag_stats AS (
-- Tag coverage from entity_metadata.TagRelationship
SELECT guid, COUNT(*) AS tag_count
FROM entity_metadata.TagRelationship
WHERE status = 'ACTIVE'
GROUP BY guid
),
domains AS (
SELECT
a.guid AS domain_guid,
a.asset_name AS domain_name
FROM gold.assets a
WHERE a.asset_type = 'DataDomain'
AND a.status = 'ACTIVE'
),
assets_with_enrichment AS (
SELECT
COALESCE(d.domain_guid, 'UNASSIGNED') AS domain_guid,
COALESCE(d.domain_name, 'No Domain Assigned') AS domain_name,
a.guid AS asset_guid,
a.asset_name,
a.asset_type,
CASE
WHEN tr.tag_count > 0
THEN 1
ELSE 0
END AS has_tags,
CASE
WHEN a.term_guids IS NOT NULL AND ARRAY_LENGTH(a.term_guids) > 0
THEN 1
ELSE 0
END AS has_terms,
CASE
WHEN a.readme_guid IS NOT NULL
THEN 1
ELSE 0
END AS has_readme,
CASE
WHEN (a.description IS NOT NULL AND LENGTH(TRIM(a.description)) > 0)
OR (a.user_description IS NOT NULL AND LENGTH(TRIM(a.user_description)) > 0)
THEN 1
ELSE 0
END AS has_description
FROM gold.assets a
LEFT JOIN tag_stats tr
ON a.guid = tr.guid
LEFT JOIN data_mesh_details dmd
ON a.guid = dmd.guid
LEFT JOIN domains d
ON dmd.data_domain = d.domain_guid
WHERE a.status = 'ACTIVE'
-- Example: Filter by owner
-- AND 'john.doe' IN UNNEST(a.owner_users)
-- Example: Filter by date (last 90 days)
-- AND a.created_at >= UNIX_MILLIS(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 90 -- DAY))
)
SELECT
domain_name,
domain_guid,
COUNT(*) AS total_assets,
SUM(has_tags) AS assets_with_tags,
ROUND((SUM(has_tags) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_tags,
SUM(has_terms) AS assets_with_terms,
ROUND((SUM(has_terms) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_terms,
SUM(has_readme) AS assets_with_readme,
ROUND((SUM(has_readme) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_readme,
SUM(has_description) AS assets_with_description,
ROUND((SUM(has_description) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_description,
ROUND(
(
(SUM(has_tags) * 100.0) / NULLIF(COUNT(*), 0) +
(SUM(has_terms) * 100.0) / NULLIF(COUNT(*), 0) +
(SUM(has_readme) * 100.0) / NULLIF(COUNT(*), 0) +
(SUM(has_description) * 100.0) / NULLIF(COUNT(*), 0)
) / 4.0,
2
) AS overall_enrichment_score
FROM assets_with_enrichment
GROUP BY domain_name, domain_guid;
Sample output
This sample output shows enrichment coverage by data domain. Use it to compare tag, term, readme, and description completeness across domains and spot gaps.
Sample output table
| DOMAIN_NAME | DOMAIN_GUID | TOTAL_ASSETS | ASSETS_WITH_TAGS | PCT_WITH_TAGS | ASSETS_WITH_TERMS | PCT_WITH_TERMS | ASSETS_WITH_README | PCT_WITH_README | ASSETS_WITH_DESCRIPTION | PCT_WITH_DESCRIPTION | OVERALL_ENRICHMENT_SCORE |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Rail | 9189e4d3-02b9-4318-b168-6e634ac535d0 | 1 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 100.00 |
| Workforce | 3094ce14-acf0-48fa-9b25-bd72c7d6a82e | 2 | 2 | 100.00 | 2 | 100.00 | 2 | 100.00 | 2 | 100.00 | 100.00 |
| Customer 360 | e24235f6-ef1e-4884-81a4-f5f76934b8b7 | 2 | 2 | 100.00 | 2 | 100.00 | 2 | 100.00 | 2 | 100.00 | 100.00 |
| Clinical Development | 09e693cb-49e7-4b14-b142-31812a39a8ae | 1 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 100.00 |
| Phase II Studies | e6ecf919-bfeb-477b-91ca-bdaa55130f58 | 1 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 100.00 |
| Source Data Verification Tracking | daa341f8-45c0-492b-a8d7-1a8bc03aca72 | 1 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 100.00 |
| Investment Management | 75f0e00c-e1e3-4d45-84f1-c361cc13a4dd | 2 | 2 | 100.00 | 2 | 100.00 | 2 | 100.00 | 2 | 100.00 | 100.00 |
| Fraud | 9d22eaaf-5890-458b-891f-e56c3f3605bb | 1 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 100.00 |
| Wealth Management | ce5f44eb-ab1f-4a3e-aa23-0ad18083b95c | 1 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 100.00 |
| Medicine | 1d522bd6-18d4-4a06-a7ba-71cf4e1526bf | 1 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 100.00 |
| Phase III Studies | 86d23a7f-ea1b-458e-8796-0e9431f29742 | 2 | 2 | 100.00 | 1 | 50.00 | 2 | 100.00 | 2 | 100.00 | 87.50 |
| Order Management | c26940ea-7d5a-4cf7-af99-55494916bdca | 5 | 4 | 80.00 | 4 | 80.00 | 4 | 80.00 | 5 | 100.00 | 85.00 |
| Study Registry | af9dd2d0-5272-4e45-8ba9-0e30bb2861c2 | 1 | 1 | 100.00 | 1 | 100.00 | 0 | 0.00 | 1 | 100.00 | 75.00 |
| Risk | 37ee7bde-3f17-4b5e-b210-b5bc5226e72c | 1 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 0 | 0.00 | 75.00 |
| Human Resources (HR) | 431c152d-bb84-4bc7-9d20-9c98ba9c0c9b | 1 | 0 | 0.00 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 75.00 |
| Insights, Analytics, & Operations | 451bfe73-13d0-4795-8bc6-d5ab148ae3ac | 1 | 0 | 0.00 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 75.00 |
| Commerical | d5de0399-2e82-4eb5-9241-ca595d5c87f0 | 1 | 1 | 100.00 | 0 | 0.00 | 1 | 100.00 | 1 | 100.00 | 75.00 |
| Flight Operations | 7d9b3ee7-1873-4378-b479-9a5d223a30b2 | 1 | 0 | 0.00 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 75.00 |
| Fin | 1266cb08-685e-444e-9513-ef4c6ab87a48 | 1 | 1 | 100.00 | 1 | 100.00 | 0 | 0.00 | 1 | 100.00 | 75.00 |
| Finance | 5431ea4b-d00b-41af-9167-18d4da7df767 | 1 | 1 | 100.00 | 0 | 0.00 | 1 | 100.00 | 1 | 100.00 | 75.00 |
| Marketing | ae9d175a-1963-43df-8634-6cf6d450bdfe | 1 | 0 | 0.00 | 1 | 100.00 | 1 | 100.00 | 1 | 100.00 | 75.00 |
| Operations | 8e053ae6-778a-45c2-b664-4f7ea3c1b201 | 6 | 4 | 66.67 | 4 | 66.67 | 4 | 66.67 | 5 | 83.33 | 70.83 |
| Finance | 83ed2107-d2ce-4da8-aa1a-6a23e91576cb | 8 | 5 | 62.50 | 5 | 62.50 | 5 | 62.50 | 7 | 87.50 | 68.75 |
| Forecasting | 802ab2e5-7e49-445f-abf6-7e41c6bf467a | 5 | 2 | 40.00 | 3 | 60.00 | 3 | 60.00 | 5 | 100.00 | 65.00 |
| Sales | c55dc63b-bbf6-4d89-80d7-87372c2b4e3d | 2 | 1 | 50.00 | 1 | 50.00 | 1 | 50.00 | 2 | 100.00 | 62.50 |
| Services | 7c901dcb-c0d0-4034-98e8-280a34e1f1eb | 3 | 1 | 33.33 | 1 | 33.33 | 2 | 66.67 | 3 | 100.00 | 58.33 |
| Customer | a150d59c-964f-4edf-ad67-2d800021769f | 10 | 5 | 50.00 | 3 | 30.00 | 5 | 50.00 | 10 | 100.00 | 57.50 |
| Capital Markets | 72d12e25-ac65-4d3e-aeb3-f465b6a065cb | 2 | 1 | 50.00 | 1 | 50.00 | 0 | 0.00 | 2 | 100.00 | 50.00 |
| Machine Learning | 936da2c9-61d9-4c3b-8220-33940d5ee20d | 4 | 1 | 25.00 | 1 | 25.00 | 1 | 25.00 | 2 | 50.00 | 31.25 |
| Occupancy | a47f32e4-d26c-4342-9419-a737263e299d | 1 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 1 | 100.00 | 25.00 |
| Value based care | d1187d88-3220-45ad-828c-e92023d4c91f | 1 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 1 | 100.00 | 25.00 |
| AirFare Calculator | e935fd96-1ada-4d7c-9a46-240a31959bb6 | 1 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0.00 |
| Cross-Functional Reporting | a105956e-5312-4e81-b518-b88487621e80 | 1 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0.00 |
| Racing | f2d36c28-19b2-4e44-a341-59e706f0244e | 2 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0.00 |
| Food & Beverage | 526de60b-aa52-41b5-84fa-ed837f6add3b | 2 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0.00 |
| No Domain Assigned | UNASSIGNED | 912271 | 109422 | 11.99 | 665 | 0.07 | 367 | 0.04 | 372862 | 40.87 | 13.24 |