Skip to main content

Metadata completeness tracking

Organizations rely on a metadata catalog like Atlan to make data discoverable, trustworthy, and reusable across teams. As metadata is harvested from different sources, enrichment such as descriptions, owners, tags, certifications, and custom metadata can be inconsistent or incomplete. Use Lakehouse to track enrichment coverage so you can identify governance gaps, prioritize cleanup efforts, and continuously improve the quality and usability of your data assets.

Before you begin

Before you run these queries, make sure:

Metadata completeness

This query measures metadata completeness across asset types by analyzing how many assets have descriptions, tags, certifications, owners, and Custom Metadata (for example, AI Readiness). It produces a governance scorecard showing enrichment gaps.

-- ============================================
-- ASSET COMPLETENESS & ENRICHMENT TRACKING
-- ============================================
-- Purpose:
-- Measure metadata completeness and governance maturity
-- across asset types by calculating:
-- • Description coverage (documented vs. undocumented)
-- • Tag coverage (tagged vs. untagged)
-- • Certification coverage (verified vs. uncertified)
-- • Ownership coverage (owned vs. orphaned)
-- • Custom Metadata enrichment (enriched vs. not enriched)
--
-- Output: Governance scorecard for catalog quality assessment
-- ============================================

WITH cm_stats AS (
-- --------------------------------------------
-- STEP 1: Calculate Custom Metadata enrichment
-- --------------------------------------------
-- For each asset, count how many Custom Metadata
-- properties are populated. This measures specialized
-- enrichment like AI Readiness, Data Quality, etc.
--
-- 🔧 CUSTOMIZE: Update the CM name filter in FLATTEN
-- to match your organization's Custom Metadata sets
-- --------------------------------------------
SELECT
alt.GUID AS asset_guid,
SUM(
-- Only count CM properties for specified CM sets
CASE
WHEN (
ARRAY_CONTAINS('ALL'::VARIANT, ARRAY_CONSTRUCT('XX')) -- Use 'ALL' to include all CM sets
OR cm.CUSTOM_METADATA_NAME IN (
-- 🔧 CUSTOMIZE: List your Custom Metadata set names here
SELECT value::STRING
FROM TABLE(FLATTEN(input => ARRAY_CONSTRUCT('AI Readiness')))
)
)
THEN
-- Check if the CM attribute has a non-null, non-empty value
CASE
WHEN cm.ATTRIBUTE_VALUE IS NULL THEN 0

-- Handle array-type CM attributes
WHEN IS_ARRAY(TRY_PARSE_JSON(cm.ATTRIBUTE_VALUE))
THEN
CASE
WHEN ARRAY_SIZE(TRY_PARSE_JSON(cm.ATTRIBUTE_VALUE)) > 0
THEN 1
ELSE 0
END

-- Handle non-array CM attributes with values
WHEN TYPEOF(TRY_PARSE_JSON(cm.ATTRIBUTE_VALUE)) <> 'ARRAY'
AND cm.ATTRIBUTE_VALUE IS NOT NULL
THEN 1

ELSE 0
END
ELSE 0
END
) AS linked_cm_prop_count
FROM ASSETS alt
LEFT JOIN CUSTOM_METADATA cm
ON alt.GUID = cm.ASSET_GUID
WHERE 1=1
GROUP BY alt.GUID
),

entity_stats AS (
-- --------------------------------------------
-- STEP 2: Calculate enrichment metrics per asset type
-- --------------------------------------------
-- For each asset type, count assets with/without:
-- • Descriptions
-- • Tags
-- • Certifications
-- • Owners
-- • Custom Metadata
-- --------------------------------------------
SELECT
ASSET_TYPE,
COUNT(*) AS total_count,

-- ========================================
-- DESCRIPTION COVERAGE
-- ========================================
COUNT(
CASE
WHEN (DESCRIPTION IS NOT NULL AND DESCRIPTION <> '')
THEN 1
END
) AS with_description,

COUNT(
CASE
WHEN (DESCRIPTION IS NULL OR DESCRIPTION = '')
THEN 1
END
) AS without_description,

-- ========================================
-- CERTIFICATION COVERAGE
-- ========================================
COUNT(
CASE
WHEN LOWER(CERTIFICATE_STATUS) = 'verified'
THEN 1
END
) AS certified,

COUNT(
CASE
WHEN LOWER(CERTIFICATE_STATUS) != 'verified'
OR CERTIFICATE_STATUS IS NULL
THEN 1
END
) AS uncertified,

-- ========================================
-- TAG COVERAGE
-- ========================================
COUNT(
CASE
WHEN (TAGS IS NOT NULL AND ARRAY_SIZE(TAGS) > 0)
THEN 1
END
) AS with_tags,

COUNT(
CASE
WHEN (TAGS IS NULL OR ARRAY_SIZE(TAGS) = 0)
THEN 1
END
) AS without_tags,

-- ========================================
-- OWNERSHIP COVERAGE
-- ========================================
-- Note: ASSETS schema only has OWNER_USERS field
-- There is no OWNER_GROUPS field in the schema
-- ========================================
COUNT(
CASE
WHEN (OWNER_USERS IS NOT NULL AND ARRAY_SIZE(OWNER_USERS) > 0)
THEN 1
END
) AS with_owners,

COUNT(
CASE
WHEN (OWNER_USERS IS NULL OR ARRAY_SIZE(OWNER_USERS) = 0)
THEN 1
END
) AS without_owners,

-- ========================================
-- CUSTOM METADATA ENRICHMENT
-- ========================================
COUNT(
CASE
WHEN (linked_cm_prop_count > 0)
THEN 1
END
) AS with_linked_cm_props,

COUNT(
CASE
WHEN (linked_cm_prop_count = 0 OR linked_cm_prop_count IS NULL)
THEN 1
END
) AS without_linked_cm_props

FROM ASSETS alt
LEFT JOIN cm_stats cm
ON alt.GUID = cm.asset_guid
WHERE
-- ========================================
-- ASSET TYPE FILTER (FIXED)
-- ========================================
-- Focus on key asset types for governance reporting
-- 🔧 CUSTOMIZE: Add/remove asset types as needed
-- ========================================
ASSET_TYPE IN (
'Table',
'Schema',
'TableauDashboard',
'TableauWorkbook',
'DataDomain',
'DataProduct',
'AtlasGlossaryTerm',
'AtlasGlossaryCategory',
'AtlasGlossary'
)
GROUP BY ASSET_TYPE
)

-- ============================================
-- STEP 3: Generate enrichment scorecard
-- ============================================
-- Output enrichment metrics with coverage percentages
-- for executive reporting and governance dashboards
-- ============================================
SELECT
ASSET_TYPE,
total_count,

-- Description metrics
with_description,
without_description,
ROUND((with_description * 100.0) / NULLIF(total_count, 0), 2) AS description_coverage_pct,

-- Tag metrics
with_tags,
without_tags,
ROUND((with_tags * 100.0) / NULLIF(total_count, 0), 2) AS tag_coverage_pct,

-- Certification metrics
certified,
uncertified,
ROUND((certified * 100.0) / NULLIF(total_count, 0), 2) AS certification_coverage_pct,

-- Ownership metrics
with_owners,
without_owners,
ROUND((with_owners * 100.0) / NULLIF(total_count, 0), 2) AS ownership_coverage_pct,

-- Custom Metadata metrics
with_linked_cm_props,
without_linked_cm_props,
ROUND((with_linked_cm_props * 100.0) / NULLIF(total_count, 0), 2) AS custom_metadata_coverage_pct

FROM entity_stats

ORDER BY ASSET_TYPE;

Sample output
ASSET_TYPETOTAL_COUNTWITH_DESCRIPTIONWITHOUT_DESCRIPTIONDESCRIPTION_COVERAGE_PCTWITH_TAGSWITHOUT_TAGSTAG_COVERAGE_PCTCERTIFIEDUNCERTIFIEDCERTIFICATION_COVERAGE_PCTWITH_OWNERSWITHOUT_OWNERSOWNERSHIP_COVERAGE_PCTWITH_LINKED_CM_PROPSWITHOUT_LINKED_CM_PROPSCUSTOM_METADATA_COVERAGE_PCT
AtlasGlossary96712573.960960.00474948.96276928.130960.00
AtlasGlossaryCategory63346317073.14166172.5316347025.759154214.3806330.00
AtlasGlossaryTerm4526440711997.377644501.68495403110.9427042565.97245240.04
DataDomain115565948.7001150.0001150.00114199.1301150.00
DataProduct1361013574.26498736.03459133.091261092.6501360.00
Schema587385496.4755820.8555820.85315565.2805870.00
Table24771725240462.9317620715171.1317609716271.09703240682.849247620.04
TableauDashboard23230100.00230100.0017673.9119482.610230.00
TableauWorkbook1713476.47170100.0011664.7115288.240170.00

Metadata completeness by data domain

This query calculates metadata enrichment statistics for all assets, grouped by their assigned Data Domains. It provides a domain-level view of metadata completeness across four key dimensions:

  • Tags: Classification metadata coverage within each domain
  • Glossary Terms: Business terminology assignments by domain
  • Readme: Documentation coverage per domain
  • Descriptions: Asset description completeness by domain

The query includes a special category for assets that aren't assigned to any domain, allowing you to identify and track unassigned assets that may need domain assignment as part of your data mesh governance strategy.

Update placeholders such as <db_name> and custom metadata names to match your environment. This query tracks enrichment coverage across asset types and is flexible. Adjust the asset type filters or add filters such as data source, domain, or platform as needed.

-- Calculate metadata enrichment with multiple filters applied
WITH domains AS (
-- Get all Data Domains with their names from ASSETS table
-- FILTER 1: Optionally filter to specific domains
SELECT
a.GUID AS domain_guid,
a.ASSET_NAME AS domain_name
FROM ASSETS a
WHERE a.ASSET_TYPE = 'DataDomain'
AND a.STATUS = 'ACTIVE'
-- Uncomment to filter by specific domains:
-- AND a.ASSET_NAME IN ('Sales Domain', 'Marketing Domain', 'Engineering Domain')
),

assets_with_enrichment AS (
SELECT
COALESCE(d.domain_guid, 'UNASSIGNED') AS domain_guid,
COALESCE(d.domain_name, 'No Domain Assigned') AS domain_name,
a.GUID AS asset_guid,
a.ASSET_NAME,
a.ASSET_TYPE,
CASE
WHEN a.TAGS IS NOT NULL AND ARRAY_SIZE(a.TAGS) > 0
THEN 1
ELSE 0
END AS has_tags,
CASE
WHEN a.TERM_GUIDS IS NOT NULL AND ARRAY_SIZE(a.TERM_GUIDS) > 0
THEN 1
ELSE 0
END AS has_terms,
CASE
WHEN a.README_GUID IS NOT NULL
THEN 1
ELSE 0
END AS has_readme,
CASE
WHEN a.DESCRIPTION IS NOT NULL AND LENGTH(TRIM(a.DESCRIPTION)) > 0
THEN 1
ELSE 0
END AS has_description
FROM ASSETS a
LEFT JOIN DATA_MESH_DETAILS dmd
ON a.GUID = dmd.GUID
LEFT JOIN domains d
ON dmd.DATA_DOMAIN = d.domain_guid
WHERE a.STATUS = 'ACTIVE'
-- FILTER 2: Optionally filter by specific asset types
-- Uncomment to include only specific asset types:
-- AND a.ASSET_TYPE IN ('Table', 'View', 'Column')

-- FILTER 3: Optionally filter by connector type
-- Uncomment to filter by specific connector:
-- AND a.CONNECTOR_NAME = 'snowflake'

-- FILTER 4: Optionally filter by certification status
-- Uncomment to include only verified assets:
-- AND a.CERTIFICATE_STATUS = 'VERIFIED'

-- FILTER 5: Optionally exclude unassigned assets
-- Uncomment to exclude assets without domain assignment:
-- AND dmd.DATA_DOMAIN IS NOT NULL

-- FILTER 6: Optionally show ONLY unassigned assets
-- Uncomment to show only unassigned assets:
-- AND (dmd.GUID IS NULL OR dmd.DATA_DOMAIN IS NULL)

-- FILTER 7: Optionally filter by creation date
-- Uncomment to filter by assets created after a specific date (epoch milliseconds):
-- AND a.CREATED_AT >= 1640995200000 -- Example: January 1, 2022

-- FILTER 8: Optionally filter by specific owner
-- Uncomment to filter by assets owned by specific users:
-- AND ARRAY_CONTAINS('john.doe'::VARIANT, a.OWNER_USERS)
)

SELECT
domain_name,
domain_guid,
COUNT(*) AS total_assets,

SUM(has_tags) AS assets_with_tags,
ROUND((SUM(has_tags) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_tags,

SUM(has_terms) AS assets_with_terms,
ROUND((SUM(has_terms) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_terms,

SUM(has_readme) AS assets_with_readme,
ROUND((SUM(has_readme) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_readme,

SUM(has_description) AS assets_with_description,
ROUND((SUM(has_description) * 100.0) / NULLIF(COUNT(*), 0), 2) AS pct_with_description,

ROUND(
(
(SUM(has_tags) * 100.0) / NULLIF(COUNT(*), 0) +
(SUM(has_terms) * 100.0) / NULLIF(COUNT(*), 0) +
(SUM(has_readme) * 100.0) / NULLIF(COUNT(*), 0) +
(SUM(has_description) * 100.0) / NULLIF(COUNT(*), 0)
) / 4.0,
2
) AS overall_enrichment_score

FROM assets_with_enrichment
GROUP BY domain_name, domain_guid

-- FILTER 9: Optionally set minimum enrichment threshold
-- Uncomment to show only domains with enrichment >= 50%:
-- HAVING overall_enrichment_score >= 50.0

-- FILTER 10: Optionally set minimum asset count
-- Uncomment to show only domains with at least 10 assets:
-- HAVING COUNT(*) >= 10

ORDER BY
CASE WHEN domain_guid = 'UNASSIGNED' THEN 1 ELSE 0 END,
overall_enrichment_score DESC;

Sample output

This sample output shows enrichment coverage by data domain. Use it to compare tag, term, readme, and description completeness across domains and spot gaps.

Sample output table
DOMAIN_NAMEDOMAIN_GUIDTOTAL_ASSETSASSETS_WITH_TAGSPCT_WITH_TAGSASSETS_WITH_TERMSPCT_WITH_TERMSASSETS_WITH_READMEPCT_WITH_READMEASSETS_WITH_DESCRIPTIONPCT_WITH_DESCRIPTIONOVERALL_ENRICHMENT_SCORE
Rail9189e4d3-02b9-4318-b168-6e634ac535d011100.001100.001100.001100.00100.00
Workforce3094ce14-acf0-48fa-9b25-bd72c7d6a82e22100.002100.002100.002100.00100.00
Customer 360e24235f6-ef1e-4884-81a4-f5f76934b8b722100.002100.002100.002100.00100.00
Clinical Development09e693cb-49e7-4b14-b142-31812a39a8ae11100.001100.001100.001100.00100.00
Phase II Studiese6ecf919-bfeb-477b-91ca-bdaa55130f5811100.001100.001100.001100.00100.00
Source Data Verification Trackingdaa341f8-45c0-492b-a8d7-1a8bc03aca7211100.001100.001100.001100.00100.00
Investment Management75f0e00c-e1e3-4d45-84f1-c361cc13a4dd22100.002100.002100.002100.00100.00
Fraud9d22eaaf-5890-458b-891f-e56c3f3605bb11100.001100.001100.001100.00100.00
Wealth Managementce5f44eb-ab1f-4a3e-aa23-0ad18083b95c11100.001100.001100.001100.00100.00
Medicine1d522bd6-18d4-4a06-a7ba-71cf4e1526bf11100.001100.001100.001100.00100.00
Phase III Studies86d23a7f-ea1b-458e-8796-0e9431f2974222100.00150.002100.002100.0087.50
Order Managementc26940ea-7d5a-4cf7-af99-55494916bdca5480.00480.00480.005100.0085.00
Study Registryaf9dd2d0-5272-4e45-8ba9-0e30bb2861c211100.001100.0000.001100.0075.00
Risk37ee7bde-3f17-4b5e-b210-b5bc5226e72c11100.001100.001100.0000.0075.00
Human Resources (HR)431c152d-bb84-4bc7-9d20-9c98ba9c0c9b100.001100.001100.001100.0075.00
Insights, Analytics, & Operations451bfe73-13d0-4795-8bc6-d5ab148ae3ac100.001100.001100.001100.0075.00
Commericald5de0399-2e82-4eb5-9241-ca595d5c87f011100.0000.001100.001100.0075.00
Flight Operations7d9b3ee7-1873-4378-b479-9a5d223a30b2100.001100.001100.001100.0075.00
Fin1266cb08-685e-444e-9513-ef4c6ab87a4811100.001100.0000.001100.0075.00
Finance5431ea4b-d00b-41af-9167-18d4da7df76711100.0000.001100.001100.0075.00
Marketingae9d175a-1963-43df-8634-6cf6d450bdfe100.001100.001100.001100.0075.00
Operations8e053ae6-778a-45c2-b664-4f7ea3c1b2016466.67466.67466.67583.3370.83
Finance83ed2107-d2ce-4da8-aa1a-6a23e91576cb8562.50562.50562.50787.5068.75
Forecasting802ab2e5-7e49-445f-abf6-7e41c6bf467a5240.00360.00360.005100.0065.00
Salesc55dc63b-bbf6-4d89-80d7-87372c2b4e3d2150.00150.00150.002100.0062.50
Services7c901dcb-c0d0-4034-98e8-280a34e1f1eb3133.33133.33266.673100.0058.33
Customera150d59c-964f-4edf-ad67-2d800021769f10550.00330.00550.0010100.0057.50
Capital Markets72d12e25-ac65-4d3e-aeb3-f465b6a065cb2150.00150.0000.002100.0050.00
Machine Learning936da2c9-61d9-4c3b-8220-33940d5ee20d4125.00125.00125.00250.0031.25
Occupancya47f32e4-d26c-4342-9419-a737263e299d100.0000.0000.001100.0025.00
Value based cared1187d88-3220-45ad-828c-e92023d4c91f100.0000.0000.001100.0025.00
AirFare Calculatore935fd96-1ada-4d7c-9a46-240a31959bb6100.0000.0000.0000.000.00
Cross-Functional Reportinga105956e-5312-4e81-b518-b88487621e80100.0000.0000.0000.000.00
Racingf2d36c28-19b2-4e44-a341-59e706f0244e200.0000.0000.0000.000.00
Food & Beverage526de60b-aa52-41b5-84fa-ed837f6add3b200.0000.0000.0000.000.00
No Domain AssignedUNASSIGNED91227110942211.996650.073670.0437286240.8713.24