1
0
mirror of https://github.com/esphome/esphome.git synced 2025-11-05 10:12:37 +01:00

[ci] Cache component dependency graph for up to 3.4x faster determine-jobs (#11648)

This commit is contained in:
J. Nick Koston 2025-11-03 22:38:57 -06:00 committed by GitHub
parent 980098ca77
commit 060bb4159f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 341 additions and 3 deletions

View File

@ -192,6 +192,11 @@ jobs:
with:
python-version: ${{ env.DEFAULT_PYTHON }}
cache-key: ${{ needs.common.outputs.cache-key }}
- name: Restore components graph cache
uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
with:
path: .temp/components_graph.json
key: components-graph-${{ hashFiles('esphome/components/**/*.py') }}
- name: Determine which tests to run
id: determine
env:
@ -216,6 +221,12 @@ jobs:
echo "cpp-unit-tests-run-all=$(echo "$output" | jq -r '.cpp_unit_tests_run_all')" >> $GITHUB_OUTPUT
echo "cpp-unit-tests-components=$(echo "$output" | jq -c '.cpp_unit_tests_components')" >> $GITHUB_OUTPUT
echo "component-test-batches=$(echo "$output" | jq -c '.component_test_batches')" >> $GITHUB_OUTPUT
- name: Save components graph cache
if: github.ref == 'refs/heads/dev'
uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
with:
path: .temp/components_graph.json
key: components-graph-${{ hashFiles('esphome/components/**/*.py') }}
integration-tests:
name: Run integration tests

View File

@ -2,6 +2,7 @@ from __future__ import annotations
from collections.abc import Callable
from functools import cache
import hashlib
import json
import os
import os.path
@ -52,6 +53,10 @@ BASE_BUS_COMPONENTS = {
"remote_receiver",
}
# Cache version for components graph
# Increment this when the cache format or graph building logic changes
COMPONENTS_GRAPH_CACHE_VERSION = 1
def parse_list_components_output(output: str) -> list[str]:
"""Parse the output from list-components.py script.
@ -756,20 +761,71 @@ def resolve_auto_load(
return auto_load()
@cache
def get_components_graph_cache_key() -> str:
"""Generate cache key based on all component Python file hashes.
Uses git ls-files with sha1 hashes to generate a stable cache key that works
across different machines and CI runs. This is faster and more reliable than
reading file contents or using modification times.
Returns:
SHA256 hex string uniquely identifying the current component state
"""
# Use git ls-files -s to get sha1 hashes of all component Python files
# Format: <mode> <sha1> <stage> <path>
# This is fast and works consistently across CI and local dev
# We hash all .py files because AUTO_LOAD, DEPENDENCIES, etc. can be defined
# in any Python file, not just __init__.py
cmd = ["git", "ls-files", "-s", "esphome/components/**/*.py"]
result = subprocess.run(
cmd, capture_output=True, text=True, check=True, cwd=root_path, close_fds=False
)
# Hash the git output (includes file paths and their sha1 hashes)
# This changes only when component Python files actually change
hasher = hashlib.sha256()
hasher.update(result.stdout.encode())
return hasher.hexdigest()
def create_components_graph() -> dict[str, list[str]]:
"""Create a graph of component dependencies.
"""Create a graph of component dependencies (cached).
This function is expensive (5-6 seconds) because it imports all ESPHome components
to extract their DEPENDENCIES and AUTO_LOAD metadata. The result is cached based
on component file modification times, so unchanged components don't trigger a rebuild.
Returns:
Dictionary mapping parent components to their children (dependencies)
"""
from pathlib import Path
# Check cache first - use fixed filename since GitHub Actions cache doesn't support wildcards
cache_file = Path(temp_folder) / "components_graph.json"
if cache_file.exists():
try:
cached_data = json.loads(cache_file.read_text())
except (OSError, json.JSONDecodeError):
# Cache file corrupted or unreadable, rebuild
pass
else:
# Verify cache version matches
if cached_data.get("_version") == COMPONENTS_GRAPH_CACHE_VERSION:
# Verify cache is for current component state
cache_key = get_components_graph_cache_key()
if cached_data.get("_cache_key") == cache_key:
return cached_data.get("graph", {})
# Cache key mismatch - stale cache, rebuild
# Cache version mismatch - incompatible format, rebuild
from esphome import const
from esphome.core import CORE
from esphome.loader import ComponentManifest, get_component, get_platform
# The root directory of the repo
root = Path(__file__).parent.parent
root = Path(root_path)
components_dir = root / ESPHOME_COMPONENTS_PATH
# Fake some directory so that get_component works
CORE.config_path = root
@ -846,6 +902,15 @@ def create_components_graph() -> dict[str, list[str]]:
# restore config
CORE.data[KEY_CORE] = TARGET_CONFIGURATIONS[0]
# Save to cache with version and cache key for validation
cache_data = {
"_version": COMPONENTS_GRAPH_CACHE_VERSION,
"_cache_key": get_components_graph_cache_key(),
"graph": components_graph,
}
cache_file.parent.mkdir(exist_ok=True)
cache_file.write_text(json.dumps(cache_data))
return components_graph

View File

@ -543,6 +543,7 @@ def test_main_filters_components_without_tests(
with (
patch.object(determine_jobs, "root_path", str(tmp_path)),
patch.object(helpers, "root_path", str(tmp_path)),
patch.object(helpers, "create_components_graph", return_value={}),
patch("sys.argv", ["determine-jobs.py"]),
patch.object(
determine_jobs,
@ -640,6 +641,7 @@ def test_main_detects_components_with_variant_tests(
with (
patch.object(determine_jobs, "root_path", str(tmp_path)),
patch.object(helpers, "root_path", str(tmp_path)),
patch.object(helpers, "create_components_graph", return_value={}),
patch("sys.argv", ["determine-jobs.py"]),
patch.object(
determine_jobs,

View File

@ -1,5 +1,6 @@
"""Unit tests for script/helpers.py module."""
from collections.abc import Generator
import json
import os
from pathlib import Path
@ -1106,3 +1107,262 @@ def test_get_component_from_path(
"""Test extraction of component names from file paths."""
result = helpers.get_component_from_path(file_path)
assert result == expected_component
# Components graph cache tests
@pytest.fixture
def mock_git_output() -> str:
"""Fixture for mock git ls-files output with realistic component files.
Includes examples of AUTO_LOAD in sensor.py and binary_sensor.py files,
which is why we need to hash all .py files, not just __init__.py.
"""
return (
"100644 abc123... 0 esphome/components/wifi/__init__.py\n"
"100644 def456... 0 esphome/components/api/__init__.py\n"
"100644 ghi789... 0 esphome/components/xiaomi_lywsd03mmc/__init__.py\n"
"100644 jkl012... 0 esphome/components/xiaomi_lywsd03mmc/sensor.py\n"
"100644 mno345... 0 esphome/components/xiaomi_cgpr1/__init__.py\n"
"100644 pqr678... 0 esphome/components/xiaomi_cgpr1/binary_sensor.py\n"
)
@pytest.fixture
def mock_cache_file(tmp_path: Path) -> Path:
"""Fixture for a temporary cache file path."""
return tmp_path / "components_graph.json"
@pytest.fixture(autouse=True)
def clear_cache_key_cache() -> None:
"""Clear the components graph cache key cache before each test."""
helpers.get_components_graph_cache_key.cache_clear()
@pytest.fixture
def mock_subprocess_run() -> Generator[Mock, None, None]:
"""Fixture to mock subprocess.run for git commands."""
with patch("subprocess.run") as mock_run:
yield mock_run
def test_cache_key_generation(mock_git_output: str, mock_subprocess_run: Mock) -> None:
"""Test that cache key is generated based on git file hashes."""
mock_result = Mock()
mock_result.stdout = mock_git_output
mock_subprocess_run.return_value = mock_result
key = helpers.get_components_graph_cache_key()
# Should be a 64-character hex string (SHA256)
assert len(key) == 64
assert all(c in "0123456789abcdef" for c in key)
def test_cache_key_consistent_for_same_files(
mock_git_output: str, mock_subprocess_run: Mock
) -> None:
"""Test that same git output produces same cache key."""
mock_result = Mock()
mock_result.stdout = mock_git_output
mock_subprocess_run.return_value = mock_result
key1 = helpers.get_components_graph_cache_key()
key2 = helpers.get_components_graph_cache_key()
assert key1 == key2
def test_cache_key_different_for_changed_files(mock_subprocess_run: Mock) -> None:
"""Test that different git output produces different cache key.
This test demonstrates that changes to any .py file (not just __init__.py)
will invalidate the cache, which is important because AUTO_LOAD can be
defined in sensor.py, binary_sensor.py, etc.
"""
mock_result1 = Mock()
mock_result1.stdout = (
"100644 abc123... 0 esphome/components/xiaomi_lywsd03mmc/sensor.py\n"
)
mock_result2 = Mock()
# Same file, different hash - simulates a change to AUTO_LOAD
mock_result2.stdout = (
"100644 xyz789... 0 esphome/components/xiaomi_lywsd03mmc/sensor.py\n"
)
mock_subprocess_run.return_value = mock_result1
key1 = helpers.get_components_graph_cache_key()
helpers.get_components_graph_cache_key.cache_clear()
mock_subprocess_run.return_value = mock_result2
key2 = helpers.get_components_graph_cache_key()
assert key1 != key2
def test_cache_key_uses_git_ls_files(
mock_git_output: str, mock_subprocess_run: Mock
) -> None:
"""Test that git ls-files command is called correctly."""
mock_result = Mock()
mock_result.stdout = mock_git_output
mock_subprocess_run.return_value = mock_result
helpers.get_components_graph_cache_key()
# Verify git ls-files was called with correct arguments
mock_subprocess_run.assert_called_once()
call_args = mock_subprocess_run.call_args
assert call_args[0][0] == [
"git",
"ls-files",
"-s",
"esphome/components/**/*.py",
]
assert call_args[1]["capture_output"] is True
assert call_args[1]["text"] is True
assert call_args[1]["check"] is True
assert call_args[1]["close_fds"] is False
def test_cache_hit_returns_cached_graph(
tmp_path: Path, mock_git_output: str, mock_subprocess_run: Mock
) -> None:
"""Test that cache hit returns cached data without rebuilding."""
mock_graph = {"wifi": ["network"], "api": ["socket"]}
cache_key = "a" * 64
cache_data = {
"_version": helpers.COMPONENTS_GRAPH_CACHE_VERSION,
"_cache_key": cache_key,
"graph": mock_graph,
}
# Write cache file
cache_file = tmp_path / "components_graph.json"
cache_file.write_text(json.dumps(cache_data))
mock_result = Mock()
mock_result.stdout = mock_git_output
mock_subprocess_run.return_value = mock_result
with (
patch("helpers.get_components_graph_cache_key", return_value=cache_key),
patch("helpers.temp_folder", str(tmp_path)),
):
result = helpers.create_components_graph()
assert result == mock_graph
def test_cache_miss_no_cache_file(
tmp_path: Path, mock_git_output: str, mock_subprocess_run: Mock
) -> None:
"""Test that cache miss rebuilds graph when no cache file exists."""
mock_result = Mock()
mock_result.stdout = mock_git_output
mock_subprocess_run.return_value = mock_result
# Create minimal components directory structure
components_dir = tmp_path / "esphome" / "components"
components_dir.mkdir(parents=True)
with (
patch("helpers.root_path", str(tmp_path)),
patch("helpers.temp_folder", str(tmp_path / ".temp")),
patch("helpers.get_components_graph_cache_key", return_value="test_key"),
):
result = helpers.create_components_graph()
# Should return empty graph for empty components directory
assert result == {}
def test_cache_miss_version_mismatch(
tmp_path: Path, mock_git_output: str, mock_subprocess_run: Mock
) -> None:
"""Test that cache miss rebuilds graph when version doesn't match."""
cache_data = {
"_version": 999, # Wrong version
"_cache_key": "test_key",
"graph": {"old": ["data"]},
}
cache_file = tmp_path / ".temp" / "components_graph.json"
cache_file.parent.mkdir(parents=True)
cache_file.write_text(json.dumps(cache_data))
mock_result = Mock()
mock_result.stdout = mock_git_output
mock_subprocess_run.return_value = mock_result
# Create minimal components directory structure
components_dir = tmp_path / "esphome" / "components"
components_dir.mkdir(parents=True)
with (
patch("helpers.root_path", str(tmp_path)),
patch("helpers.temp_folder", str(tmp_path / ".temp")),
patch("helpers.get_components_graph_cache_key", return_value="test_key"),
):
result = helpers.create_components_graph()
# Should rebuild and return empty graph, not use cached data
assert result == {}
def test_cache_miss_key_mismatch(
tmp_path: Path, mock_git_output: str, mock_subprocess_run: Mock
) -> None:
"""Test that cache miss rebuilds graph when cache key doesn't match."""
cache_data = {
"_version": helpers.COMPONENTS_GRAPH_CACHE_VERSION,
"_cache_key": "old_key",
"graph": {"old": ["data"]},
}
cache_file = tmp_path / ".temp" / "components_graph.json"
cache_file.parent.mkdir(parents=True)
cache_file.write_text(json.dumps(cache_data))
mock_result = Mock()
mock_result.stdout = mock_git_output
mock_subprocess_run.return_value = mock_result
# Create minimal components directory structure
components_dir = tmp_path / "esphome" / "components"
components_dir.mkdir(parents=True)
with (
patch("helpers.root_path", str(tmp_path)),
patch("helpers.temp_folder", str(tmp_path / ".temp")),
patch("helpers.get_components_graph_cache_key", return_value="new_key"),
):
result = helpers.create_components_graph()
# Should rebuild and return empty graph, not use cached data with old key
assert result == {}
def test_cache_miss_corrupted_json(
tmp_path: Path, mock_git_output: str, mock_subprocess_run: Mock
) -> None:
"""Test that cache miss rebuilds graph when cache file has invalid JSON."""
cache_file = tmp_path / ".temp" / "components_graph.json"
cache_file.parent.mkdir(parents=True)
cache_file.write_text("{invalid json")
mock_result = Mock()
mock_result.stdout = mock_git_output
mock_subprocess_run.return_value = mock_result
# Create minimal components directory structure
components_dir = tmp_path / "esphome" / "components"
components_dir.mkdir(parents=True)
with (
patch("helpers.root_path", str(tmp_path)),
patch("helpers.temp_folder", str(tmp_path / ".temp")),
patch("helpers.get_components_graph_cache_key", return_value="test_key"),
):
result = helpers.create_components_graph()
# Should handle corruption gracefully and rebuild
assert result == {}