From 6855adc0af2e33083299454a87a72fafb8cce28e Mon Sep 17 00:00:00 2001 From: Harvey <64276030+HabiRabbu@users.noreply.github.com> Date: Sun, 5 Apr 2026 17:21:30 +0100 Subject: [PATCH] hotfix: prevent duplicate artist discovery precache execution (#23) --- Makefile | 2 +- backend/core/tasks.py | 2 +- backend/repositories/musicbrainz_album.py | 6 +- backend/services/artist_discovery_service.py | 29 ++- .../services/test_discovery_precache_lock.py | 192 ++++++++++++++++++ .../test_discovery_precache_progress.py | 15 +- 6 files changed, 235 insertions(+), 11 deletions(-) create mode 100644 backend/tests/services/test_discovery_precache_lock.py diff --git a/Makefile b/Makefile index 2fda2aa..a3c9b6d 100644 --- a/Makefile +++ b/Makefile @@ -80,7 +80,7 @@ backend-test-infra-hardening: $(BACKEND_VENV_STAMP) ## Run infrastructure harden cd "$(BACKEND_DIR)" && .venv/bin/python -m pytest tests/infrastructure/test_circuit_breaker_sync.py tests/infrastructure/test_disk_cache_periodic.py tests/infrastructure/test_retry_non_breaking.py backend-test-discovery-precache: $(BACKEND_VENV_STAMP) ## Run artist discovery precache tests - cd "$(BACKEND_DIR)" && .venv/bin/python -m pytest tests/services/test_discovery_precache_progress.py tests/infrastructure/test_retry_non_breaking.py -v + cd "$(BACKEND_DIR)" && .venv/bin/python -m pytest tests/services/test_discovery_precache_progress.py tests/services/test_discovery_precache_lock.py tests/infrastructure/test_retry_non_breaking.py -v backend-test-library-pagination: $(BACKEND_VENV_STAMP) ## Run library pagination tests cd "$(BACKEND_DIR)" && .venv/bin/python -m pytest tests/infrastructure/test_library_pagination.py -v diff --git a/backend/core/tasks.py b/backend/core/tasks.py index e0fc8b1..37fba85 100644 --- a/backend/core/tasks.py +++ b/backend/core/tasks.py @@ -376,7 +376,7 @@ async def warm_artist_discovery_cache_periodically( interval: int = 14400, delay: float = 0.5, ) -> None: - await asyncio.sleep(60) + await asyncio.sleep(300) # Wait for Phase 1.5 sync to finish first while True: try: diff --git a/backend/repositories/musicbrainz_album.py b/backend/repositories/musicbrainz_album.py index 820afba..b9cad1e 100644 --- a/backend/repositories/musicbrainz_album.py +++ b/backend/repositories/musicbrainz_album.py @@ -288,7 +288,7 @@ class MusicBrainzAlbumMixin: cache_key = f"{MB_RELEASE_TO_RG_PREFIX}{release_id}" cached = await self._cache.get(cache_key) if cached is not None: - logger.info(f"[MB] Cache hit for release {release_id[:8]}: '{cached[:8] if cached else 'empty'}'") + logger.debug(f"[MB] Cache hit for release {release_id[:8]}: '{cached[:8] if cached else 'empty'}'") return cached if cached != "" else None dedupe_key = f"{MB_RELEASE_TO_RG_PREFIX}{release_id}" @@ -303,7 +303,7 @@ class MusicBrainzAlbumMixin: cache_key: str, ) -> str | None: try: - logger.info(f"[MB] Fetching release group for release {release_id[:8]}") + logger.debug(f"[MB] Fetching release group for release {release_id[:8]}") result = await mb_api_get( f"/release/{release_id}", params={"inc": "release-groups+recordings"}, @@ -312,7 +312,7 @@ class MusicBrainzAlbumMixin: ) rg = result.release_group rg_id = rg.get("id") - logger.info(f"[MB] Resolved release {release_id[:8]} -> release_group {rg_id}") + logger.debug(f"[MB] Resolved release {release_id[:8]} -> release_group {rg_id}") await self._cache.set(cache_key, rg_id or "", ttl_seconds=86400) positions: dict[str, list[int]] = {} diff --git a/backend/services/artist_discovery_service.py b/backend/services/artist_discovery_service.py index 8af71fa..abaaeba 100644 --- a/backend/services/artist_discovery_service.py +++ b/backend/services/artist_discovery_service.py @@ -26,6 +26,9 @@ DEFAULT_SIMILAR_COUNT = 15 DEFAULT_TOP_SONGS_COUNT = 10 DEFAULT_TOP_ALBUMS_COUNT = 10 +# Module-level flag survives singleton cache invalidation / instance recreation +_discovery_precache_running = False + class ArtistDiscoveryService: def __init__( @@ -406,6 +409,27 @@ class ArtistDiscoveryService: delay: float = 0.5, status_service: Any = None, mbid_to_name: dict[str, str] | None = None, + ) -> int: + global _discovery_precache_running + if _discovery_precache_running: + logger.info("Discovery precache already running, skipping duplicate invocation") + return 0 + + _discovery_precache_running = True + try: + return await self._do_precache_artist_discovery( + artist_mbids, delay=delay, + status_service=status_service, mbid_to_name=mbid_to_name, + ) + finally: + _discovery_precache_running = False + + async def _do_precache_artist_discovery( + self, + artist_mbids: list[str], + delay: float = 0.5, + status_service: Any = None, + mbid_to_name: dict[str, str] | None = None, ) -> int: sources: list[Literal["listenbrainz", "lastfm"]] = [] if self._lb_repo.is_configured(): @@ -469,8 +493,9 @@ class ArtistDiscoveryService: async with counter_lock: source_fetches += 1 - if delay > 0: - await asyncio.sleep(delay) + # Sleep inside semaphore to hold slot and throttle API calls + if delay > 0: + await asyncio.sleep(delay) async with counter_lock: cached_count += 1 diff --git a/backend/tests/services/test_discovery_precache_lock.py b/backend/tests/services/test_discovery_precache_lock.py new file mode 100644 index 0000000..6186276 --- /dev/null +++ b/backend/tests/services/test_discovery_precache_lock.py @@ -0,0 +1,192 @@ +"""Tests for discovery precache double-execution prevention and throttling.""" + +import asyncio +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from services.artist_discovery_service import ArtistDiscoveryService +import services.artist_discovery_service as _ads_module + + +@pytest.fixture(autouse=True) +def _reset_precache_flag(): + _ads_module._discovery_precache_running = False + yield + _ads_module._discovery_precache_running = False + + +def _make_service(*, lb_configured: bool = True, lastfm_enabled: bool = False): + lb_repo = MagicMock() + lb_repo.is_configured.return_value = lb_configured + + lastfm_repo = MagicMock() if lastfm_enabled else None + prefs = MagicMock() + prefs.is_lastfm_enabled.return_value = lastfm_enabled + advanced = MagicMock() + advanced.artist_discovery_precache_concurrency = 2 + prefs.get_advanced_settings.return_value = advanced + + cache = AsyncMock() + cache.get = AsyncMock(return_value=None) + cache.set = AsyncMock() + + library_db = AsyncMock() + library_db.get_all_artist_mbids = AsyncMock(return_value=set()) + + svc = ArtistDiscoveryService( + listenbrainz_repo=lb_repo, + musicbrainz_repo=MagicMock(), + library_db=library_db, + lidarr_repo=MagicMock(), + memory_cache=cache, + lastfm_repo=lastfm_repo, + preferences_service=prefs, + ) + return svc + + +@pytest.mark.asyncio +async def test_duplicate_invocation_skipped(): + """Second call returns 0 immediately when precache is already running.""" + svc = _make_service() + + gate = asyncio.Event() + + async def slow_similar(*args, **kwargs): + await gate.wait() + return MagicMock() + + with ( + patch.object(svc, "get_similar_artists", new_callable=AsyncMock, side_effect=slow_similar), + patch.object(svc, "get_top_songs", new_callable=AsyncMock, return_value=MagicMock()), + patch.object(svc, "get_top_albums", new_callable=AsyncMock, return_value=MagicMock()), + ): + task1 = asyncio.create_task( + svc.precache_artist_discovery(["mbid-a"], delay=0) + ) + await asyncio.sleep(0.01) + + assert _ads_module._discovery_precache_running is True + result2 = await svc.precache_artist_discovery(["mbid-b"], delay=0) + assert result2 == 0 + + gate.set() + result1 = await task1 + assert result1 >= 0 + + assert _ads_module._discovery_precache_running is False + + +@pytest.mark.asyncio +async def test_lock_released_after_exception(): + """Flag is cleared even when precache raises an unexpected error.""" + svc = _make_service() + + with patch.object( + svc, "_do_precache_artist_discovery", + new_callable=AsyncMock, + side_effect=RuntimeError("boom"), + ): + with pytest.raises(RuntimeError, match="boom"): + await svc.precache_artist_discovery(["mbid-a"], delay=0) + + assert _ads_module._discovery_precache_running is False + + with ( + patch.object(svc, "get_similar_artists", new_callable=AsyncMock, return_value=MagicMock()), + patch.object(svc, "get_top_songs", new_callable=AsyncMock, return_value=MagicMock()), + patch.object(svc, "get_top_albums", new_callable=AsyncMock, return_value=MagicMock()), + ): + result = await svc.precache_artist_discovery(["mbid-a"], delay=0) + assert result >= 0 + + +@pytest.mark.asyncio +async def test_delay_holds_semaphore_slot(): + """Delay is applied inside the semaphore, blocking other artists from starting.""" + svc = _make_service() + timestamps: list[float] = [] + + loop = asyncio.get_event_loop() + + async def track_similar(*args, **kwargs): + timestamps.append(loop.time()) + return MagicMock() + + with ( + patch.object(svc, "get_similar_artists", new_callable=AsyncMock, side_effect=track_similar), + patch.object(svc, "get_top_songs", new_callable=AsyncMock, return_value=MagicMock()), + patch.object(svc, "get_top_albums", new_callable=AsyncMock, return_value=MagicMock()), + ): + await svc.precache_artist_discovery( + ["mbid-a", "mbid-b", "mbid-c", "mbid-d"], + delay=0.15, + ) + + assert len(timestamps) == 4 + # With concurrency=2 and delay=0.15s inside semaphore, the 3rd artist + # cannot start until one of the first two finishes its delay. + # The gap between the 2nd and 3rd timestamps should be >= delay. + sorted_ts = sorted(timestamps) + gap = sorted_ts[2] - sorted_ts[0] + assert gap >= 0.1, f"Expected >=0.1s gap due to semaphore-held delay, got {gap:.3f}s" + + +@pytest.mark.asyncio +async def test_cached_artists_skip_api_calls(): + """Artists with all cache keys populated skip API fetches entirely.""" + svc = _make_service() + + call_count = 0 + + async def counting_similar(*args, **kwargs): + nonlocal call_count + call_count += 1 + return MagicMock() + + svc._cache.get = AsyncMock(return_value=MagicMock()) + + with ( + patch.object(svc, "get_similar_artists", new_callable=AsyncMock, side_effect=counting_similar), + patch.object(svc, "get_top_songs", new_callable=AsyncMock, return_value=MagicMock()), + patch.object(svc, "get_top_albums", new_callable=AsyncMock, return_value=MagicMock()), + ): + result = await svc.precache_artist_discovery( + ["mbid-a", "mbid-b"], + delay=0, + ) + + assert call_count == 0, "Expected no API calls when all cache keys are populated" + assert result == 2 + + +@pytest.mark.asyncio +async def test_guard_survives_instance_recreation(): + """Module-level flag prevents overlap even when a new service instance is created.""" + svc1 = _make_service() + svc2 = _make_service() + + gate = asyncio.Event() + + async def slow_similar(*args, **kwargs): + await gate.wait() + return MagicMock() + + with ( + patch.object(svc1, "get_similar_artists", new_callable=AsyncMock, side_effect=slow_similar), + patch.object(svc1, "get_top_songs", new_callable=AsyncMock, return_value=MagicMock()), + patch.object(svc1, "get_top_albums", new_callable=AsyncMock, return_value=MagicMock()), + ): + task1 = asyncio.create_task( + svc1.precache_artist_discovery(["mbid-a"], delay=0) + ) + await asyncio.sleep(0.01) + + result2 = await svc2.precache_artist_discovery(["mbid-b"], delay=0) + assert result2 == 0, "Second instance should be blocked by module-level flag" + + gate.set() + await task1 + + assert _ads_module._discovery_precache_running is False diff --git a/backend/tests/services/test_discovery_precache_progress.py b/backend/tests/services/test_discovery_precache_progress.py index 5cf7840..566f496 100644 --- a/backend/tests/services/test_discovery_precache_progress.py +++ b/backend/tests/services/test_discovery_precache_progress.py @@ -15,6 +15,9 @@ def _make_service(*, lb_configured: bool = True, lastfm_enabled: bool = False): lastfm_repo = MagicMock() if lastfm_enabled else None prefs = MagicMock() prefs.is_lastfm_enabled.return_value = lastfm_enabled + advanced = MagicMock() + advanced.artist_discovery_precache_concurrency = 3 + prefs.get_advanced_settings.return_value = advanced cache = AsyncMock() cache.get = AsyncMock(return_value=None) @@ -38,8 +41,9 @@ def _make_service(*, lb_configured: bool = True, lastfm_enabled: bool = False): @pytest.mark.asyncio async def test_progress_updates_on_success(): svc = _make_service() - status = AsyncMock() + status = MagicMock() status.update_progress = AsyncMock() + status.is_cancelled = MagicMock(return_value=False) with ( patch.object(svc, "get_similar_artists", new_callable=AsyncMock, return_value=MagicMock()), @@ -61,8 +65,9 @@ async def test_progress_updates_on_success(): @pytest.mark.asyncio async def test_progress_updates_even_on_failure(): svc = _make_service() - status = AsyncMock() + status = MagicMock() status.update_progress = AsyncMock() + status.is_cancelled = MagicMock(return_value=False) with ( patch.object(svc, "get_similar_artists", new_callable=AsyncMock, side_effect=RuntimeError("boom")), @@ -85,8 +90,9 @@ async def test_progress_updates_even_on_failure(): @pytest.mark.asyncio async def test_progress_updates_on_mixed_success_and_failure(): svc = _make_service() - status = AsyncMock() + status = MagicMock() status.update_progress = AsyncMock() + status.is_cancelled = MagicMock(return_value=False) call_count = 0 @@ -114,8 +120,9 @@ async def test_progress_updates_on_mixed_success_and_failure(): @pytest.mark.asyncio async def test_cached_artists_still_update_progress(): svc = _make_service() - status = AsyncMock() + status = MagicMock() status.update_progress = AsyncMock() + status.is_cancelled = MagicMock(return_value=False) svc._cache.get = AsyncMock(return_value="cached-value")