fix: navidrome subsonic api errors incorrectly tripping circuit breaker (#9)

This commit is contained in:
Harvey
2026-04-04 15:32:52 +01:00
committed by GitHub
parent c141b9fcdf
commit c696eae987
10 changed files with 151 additions and 24 deletions
+13 -8
View File
@@ -38,19 +38,24 @@ async def get_navidrome_albums(
genre: str = Query(default=""), genre: str = Query(default=""),
service: NavidromeLibraryService = Depends(get_navidrome_library_service), service: NavidromeLibraryService = Depends(get_navidrome_library_service),
) -> NavidromeAlbumPage: ) -> NavidromeAlbumPage:
subsonic_type = "byGenre" if genre else _SORT_MAP.get(sort_by, "alphabeticalByName")
try: try:
if genre: items = await service.get_albums(
subsonic_type = "byGenre" type=subsonic_type, size=limit, offset=offset, genre=genre if genre else None,
else: )
subsonic_type = _SORT_MAP.get(sort_by, "alphabeticalByName")
items = await service.get_albums(type=subsonic_type, size=limit, offset=offset, genre=genre if genre else None)
stats = await service.get_stats()
total = stats.total_albums if len(items) >= limit else offset + len(items)
return NavidromeAlbumPage(items=items, total=total)
except ExternalServiceError as e: except ExternalServiceError as e:
logger.error("Navidrome service error getting albums: %s", e) logger.error("Navidrome service error getting albums: %s", e)
raise HTTPException(status_code=502, detail="Failed to communicate with Navidrome") raise HTTPException(status_code=502, detail="Failed to communicate with Navidrome")
try:
stats = await service.get_stats()
total = stats.total_albums if len(items) >= limit else offset + len(items)
except ExternalServiceError:
logger.warning("Navidrome stats unavailable, using heuristic pagination total")
total = offset + len(items) + (1 if len(items) >= limit else 0)
return NavidromeAlbumPage(items=items, total=total)
@router.get("/albums/{album_id}", response_model=NavidromeAlbumDetail) @router.get("/albums/{album_id}", response_model=NavidromeAlbumDetail)
async def get_navidrome_album_detail( async def get_navidrome_album_detail(
+2 -1
View File
@@ -42,10 +42,11 @@ async def external_service_error_handler(request: Request, exc: ExternalServiceE
async def circuit_open_error_handler(request: Request, exc: CircuitOpenError) -> MsgSpecJSONResponse: async def circuit_open_error_handler(request: Request, exc: CircuitOpenError) -> MsgSpecJSONResponse:
logger.error("Circuit breaker open: %s - %s %s", exc, request.method, request.url.path) logger.error("Circuit breaker open: %s - %s %s", exc, request.method, request.url.path)
name = exc.breaker_name.replace("_", " ").title() if getattr(exc, "breaker_name", "") else "Service"
return error_response( return error_response(
status.HTTP_503_SERVICE_UNAVAILABLE, status.HTTP_503_SERVICE_UNAVAILABLE,
CIRCUIT_BREAKER_OPEN, CIRCUIT_BREAKER_OPEN,
"Service temporarily unavailable due to repeated connection failures. Check your settings or wait for the service to recover.", f"{name} is temporarily unavailable due to repeated connection failures. Check your settings or wait for the service to recover.",
) )
+20
View File
@@ -79,5 +79,25 @@ class NavidromeAuthError(NavidromeApiError):
pass pass
class NavidromeSubsonicError(ExternalServiceError):
"""Non-auth error from a valid Subsonic API envelope.
Raised when Navidrome returns a well-formed ``subsonic-response`` with
a non-OK status and an error code that is *not* an authentication
failure (codes 40/41). These are potentially transient (e.g. "Library
not found or empty" during a rescan) and should be retried but must
**not** trip the circuit breaker.
"""
def __init__(
self,
message: str,
details: Any = None,
code: int | None = None,
):
super().__init__(message, details)
self.code = code
class ClientDisconnectedError(MusicseerrException): class ClientDisconnectedError(MusicseerrException):
pass pass
+6 -3
View File
@@ -160,7 +160,9 @@ class CircuitBreaker:
class CircuitOpenError(Exception): class CircuitOpenError(Exception):
pass def __init__(self, message: str, breaker_name: str = ""):
super().__init__(message)
self.breaker_name = breaker_name
def _get_retry_after_seconds(exception: Exception) -> Optional[float]: def _get_retry_after_seconds(exception: Exception) -> Optional[float]:
@@ -206,7 +208,8 @@ def with_retry(
extra={"service_name": service_name, "function": func_name} extra={"service_name": service_name, "function": func_name}
) )
raise CircuitOpenError( raise CircuitOpenError(
f"Circuit breaker '{circuit_breaker.name}' is OPEN" f"Circuit breaker '{circuit_breaker.name}' is OPEN",
breaker_name=circuit_breaker.name,
) )
last_exception = None last_exception = None
@@ -293,7 +296,7 @@ def with_retry(
if circuit_breaker and last_exception: if circuit_breaker and last_exception:
is_non_breaking = isinstance(last_exception, non_breaking_exceptions) if non_breaking_exceptions else False is_non_breaking = isinstance(last_exception, non_breaking_exceptions) if non_breaking_exceptions else False
if not is_non_breaking or circuit_breaker.state == CircuitState.HALF_OPEN: if not is_non_breaking:
await circuit_breaker.arecord_failure() await circuit_breaker.arecord_failure()
raise last_exception raise last_exception
+2 -2
View File
@@ -6,7 +6,7 @@ from typing import Any
import msgspec import msgspec
from core.exceptions import NavidromeApiError, NavidromeAuthError from core.exceptions import NavidromeApiError, NavidromeAuthError, NavidromeSubsonicError
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -88,7 +88,7 @@ def parse_subsonic_response(data: dict[str, Any]) -> dict[str, Any]:
message = error.get("message", "Unknown Subsonic API error") message = error.get("message", "Unknown Subsonic API error")
if code in (40, 41): if code in (40, 41):
raise NavidromeAuthError(message, code=code) raise NavidromeAuthError(message, code=code)
raise NavidromeApiError(message, code=code) raise NavidromeSubsonicError(message, code=code)
return resp return resp
+2 -1
View File
@@ -9,7 +9,7 @@ from urllib.parse import urlencode
import httpx import httpx
import msgspec import msgspec
from core.exceptions import ExternalServiceError, NavidromeApiError, NavidromeAuthError from core.exceptions import ExternalServiceError, NavidromeApiError, NavidromeAuthError, NavidromeSubsonicError
from infrastructure.cache.cache_keys import NAVIDROME_PREFIX from infrastructure.cache.cache_keys import NAVIDROME_PREFIX
from infrastructure.cache.memory_cache import CacheInterface from infrastructure.cache.memory_cache import CacheInterface
from infrastructure.resilience.retry import with_retry, CircuitBreaker from infrastructure.resilience.retry import with_retry, CircuitBreaker
@@ -210,6 +210,7 @@ class NavidromeRepository:
max_delay=5.0, max_delay=5.0,
circuit_breaker=_navidrome_circuit_breaker, circuit_breaker=_navidrome_circuit_breaker,
retriable_exceptions=(httpx.HTTPError, ExternalServiceError), retriable_exceptions=(httpx.HTTPError, ExternalServiceError),
non_breaking_exceptions=(NavidromeSubsonicError,),
) )
async def _request( async def _request(
self, self,
@@ -144,8 +144,8 @@ async def test_circuit_still_opens_for_real_errors_amid_rate_limits():
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_non_breaking_in_half_open_reopens_circuit(): async def test_non_breaking_in_half_open_stays_half_open():
"""Non-breaking exceptions in HALF_OPEN must still reopen the circuit.""" """Non-breaking exceptions in HALF_OPEN keep the circuit HALF_OPEN (service is reachable)."""
cb = CircuitBreaker(failure_threshold=2, success_threshold=2, timeout=0.01, name="test-half-open") cb = CircuitBreaker(failure_threshold=2, success_threshold=2, timeout=0.01, name="test-half-open")
for _ in range(2): for _ in range(2):
@@ -170,7 +170,7 @@ async def test_non_breaking_in_half_open_reopens_circuit():
with pytest.raises(_RateLimited): with pytest.raises(_RateLimited):
await rate_limited_in_half_open() await rate_limited_in_half_open()
assert cb.state == CircuitState.OPEN assert cb.state == CircuitState.HALF_OPEN
@pytest.mark.asyncio @pytest.mark.asyncio
@@ -6,7 +6,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
import httpx import httpx
import pytest import pytest
from core.exceptions import ExternalServiceError, NavidromeApiError, NavidromeAuthError from core.exceptions import ExternalServiceError, NavidromeApiError, NavidromeAuthError, NavidromeSubsonicError
from repositories.navidrome_repository import _navidrome_circuit_breaker from repositories.navidrome_repository import _navidrome_circuit_breaker
from repositories.navidrome_models import ( from repositories.navidrome_models import (
SubsonicAlbum, SubsonicAlbum,
@@ -86,14 +86,14 @@ class TestParseSubsonicResponse:
resp = parse_subsonic_response(data) resp = parse_subsonic_response(data)
assert resp["status"] == "ok" assert resp["status"] == "ok"
def test_error_status_raises_api_error(self): def test_error_status_raises_subsonic_error(self):
data = { data = {
"subsonic-response": { "subsonic-response": {
"status": "failed", "status": "failed",
"error": {"code": 70, "message": "Not found"}, "error": {"code": 70, "message": "Not found"},
} }
} }
with pytest.raises(NavidromeApiError, match="Not found"): with pytest.raises(NavidromeSubsonicError, match="Not found"):
parse_subsonic_response(data) parse_subsonic_response(data)
def test_auth_error_code_40(self): def test_auth_error_code_40(self):
@@ -333,6 +333,63 @@ class TestErrorHandling:
await repo._request("/rest/ping") await repo._request("/rest/ping")
class TestCircuitBreakerNonBreaking:
"""Verify NavidromeSubsonicError doesn't trip the circuit breaker."""
def setup_method(self):
_navidrome_circuit_breaker.reset()
@pytest.mark.asyncio
async def test_subsonic_error_does_not_open_circuit_breaker(self):
"""Repeated SubsonicErrors (e.g. 'Library not found') should NOT open the CB."""
repo, client, _ = _make_repo()
error_envelope = {
"subsonic-response": {
"status": "failed",
"error": {"code": 70, "message": "Library not found or empty"},
}
}
client.get = AsyncMock(return_value=_mock_response(error_envelope))
with patch("infrastructure.resilience.retry.asyncio.sleep", new_callable=AsyncMock):
for _ in range(10):
with pytest.raises(NavidromeSubsonicError):
await repo._request("/rest/getAlbumList2")
from infrastructure.resilience.retry import CircuitState
assert _navidrome_circuit_breaker.state == CircuitState.CLOSED
@pytest.mark.asyncio
async def test_auth_error_still_trips_circuit_breaker(self):
"""Auth errors (NavidromeAuthError) should still record CB failures."""
from infrastructure.resilience.retry import CircuitOpenError, CircuitState
repo, client, _ = _make_repo()
client.get = AsyncMock(return_value=_mock_response({}, status_code=401))
with patch("infrastructure.resilience.retry.asyncio.sleep", new_callable=AsyncMock):
for _ in range(10):
with pytest.raises((NavidromeAuthError, ExternalServiceError, CircuitOpenError)):
await repo._request("/rest/ping")
assert _navidrome_circuit_breaker.state == CircuitState.OPEN
@pytest.mark.asyncio
async def test_connection_error_still_trips_circuit_breaker(self):
"""Transport-level errors should still record CB failures."""
from infrastructure.resilience.retry import CircuitOpenError, CircuitState
repo, client, _ = _make_repo()
client.get = AsyncMock(side_effect=httpx.ConnectError("refused"))
with patch("infrastructure.resilience.retry.asyncio.sleep", new_callable=AsyncMock):
for _ in range(10):
with pytest.raises((ExternalServiceError, CircuitOpenError)):
await repo._request("/rest/ping")
assert _navidrome_circuit_breaker.state == CircuitState.OPEN
class TestValidateConnection: class TestValidateConnection:
def setup_method(self): def setup_method(self):
_navidrome_circuit_breaker.reset() _navidrome_circuit_breaker.reset()
@@ -91,6 +91,25 @@ class TestLibraryAlbums:
assert data["items"][0]["navidrome_id"] == "a1" assert data["items"][0]["navidrome_id"] == "a1"
assert data["total"] == 1 assert data["total"] == 1
def test_get_albums_stats_fallback(self, library_client, mock_library_service):
"""When stats fails, albums still returns with heuristic total."""
mock_library_service.get_albums = AsyncMock(return_value=[_album_summary(id=f"a{i}") for i in range(48)])
mock_library_service.get_stats = AsyncMock(side_effect=ExternalServiceError("Library not found"))
resp = library_client.get("/navidrome/albums?limit=48")
assert resp.status_code == 200
data = resp.json()
assert len(data["items"]) == 48
assert data["total"] == 49 # offset(0) + 48 + 1 (full page heuristic)
def test_get_albums_stats_fallback_partial_page(self, library_client, mock_library_service):
"""Partial page + stats failure → total = offset + len(items), no +1."""
mock_library_service.get_albums = AsyncMock(return_value=[_album_summary(id=f"a{i}") for i in range(5)])
mock_library_service.get_stats = AsyncMock(side_effect=ExternalServiceError("down"))
resp = library_client.get("/navidrome/albums?limit=48")
assert resp.status_code == 200
data = resp.json()
assert data["total"] == 5
def test_get_album_detail(self, library_client): def test_get_album_detail(self, library_client):
resp = library_client.get("/navidrome/albums/a1") resp = library_client.get("/navidrome/albums/a1")
assert resp.status_code == 200 assert resp.status_code == 200
+24 -3
View File
@@ -26,7 +26,10 @@ def _build_app() -> FastAPI:
@app.get("/raise-circuit") @app.get("/raise-circuit")
async def raise_circuit(): async def raise_circuit():
raise CircuitOpenError("JellyfinRepository after 5 failures") raise CircuitOpenError(
"Circuit breaker 'jellyfin' is OPEN",
breaker_name="jellyfin",
)
app.add_exception_handler(ExternalServiceError, external_service_error_handler) app.add_exception_handler(ExternalServiceError, external_service_error_handler)
app.add_exception_handler(CircuitOpenError, circuit_open_error_handler) app.add_exception_handler(CircuitOpenError, circuit_open_error_handler)
@@ -70,5 +73,23 @@ async def test_circuit_open_error_hides_details():
body = resp.json() body = resp.json()
assert resp.status_code == 503 assert resp.status_code == 503
assert body["error"]["message"] == "Service temporarily unavailable due to repeated connection failures. Check your settings or wait for the service to recover." assert body["error"]["message"] == "Jellyfin is temporarily unavailable due to repeated connection failures. Check your settings or wait for the service to recover."
assert "JellyfinRepository" not in resp.text assert "circuit breaker" not in resp.text.lower() or "CIRCUIT_BREAKER_OPEN" in resp.text
@pytest.mark.asyncio
async def test_circuit_open_error_without_breaker_name_falls_back():
app = FastAPI()
@app.get("/raise-circuit-unnamed")
async def raise_circuit_unnamed():
raise CircuitOpenError("CB tripped")
app.add_exception_handler(CircuitOpenError, circuit_open_error_handler)
transport = httpx.ASGITransport(app=app, raise_app_exceptions=False)
async with httpx.AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.get("/raise-circuit-unnamed")
body = resp.json()
assert resp.status_code == 503
assert body["error"]["message"].startswith("Service is temporarily unavailable")