fix: navidrome subsonic api errors incorrectly tripping circuit breaker (#9)

This commit is contained in:
Harvey
2026-04-04 15:32:52 +01:00
committed by GitHub
parent c141b9fcdf
commit c696eae987
10 changed files with 151 additions and 24 deletions
+13 -8
View File
@@ -38,19 +38,24 @@ async def get_navidrome_albums(
genre: str = Query(default=""),
service: NavidromeLibraryService = Depends(get_navidrome_library_service),
) -> NavidromeAlbumPage:
subsonic_type = "byGenre" if genre else _SORT_MAP.get(sort_by, "alphabeticalByName")
try:
if genre:
subsonic_type = "byGenre"
else:
subsonic_type = _SORT_MAP.get(sort_by, "alphabeticalByName")
items = await service.get_albums(type=subsonic_type, size=limit, offset=offset, genre=genre if genre else None)
stats = await service.get_stats()
total = stats.total_albums if len(items) >= limit else offset + len(items)
return NavidromeAlbumPage(items=items, total=total)
items = await service.get_albums(
type=subsonic_type, size=limit, offset=offset, genre=genre if genre else None,
)
except ExternalServiceError as e:
logger.error("Navidrome service error getting albums: %s", e)
raise HTTPException(status_code=502, detail="Failed to communicate with Navidrome")
try:
stats = await service.get_stats()
total = stats.total_albums if len(items) >= limit else offset + len(items)
except ExternalServiceError:
logger.warning("Navidrome stats unavailable, using heuristic pagination total")
total = offset + len(items) + (1 if len(items) >= limit else 0)
return NavidromeAlbumPage(items=items, total=total)
@router.get("/albums/{album_id}", response_model=NavidromeAlbumDetail)
async def get_navidrome_album_detail(
+2 -1
View File
@@ -42,10 +42,11 @@ async def external_service_error_handler(request: Request, exc: ExternalServiceE
async def circuit_open_error_handler(request: Request, exc: CircuitOpenError) -> MsgSpecJSONResponse:
logger.error("Circuit breaker open: %s - %s %s", exc, request.method, request.url.path)
name = exc.breaker_name.replace("_", " ").title() if getattr(exc, "breaker_name", "") else "Service"
return error_response(
status.HTTP_503_SERVICE_UNAVAILABLE,
CIRCUIT_BREAKER_OPEN,
"Service temporarily unavailable due to repeated connection failures. Check your settings or wait for the service to recover.",
f"{name} is temporarily unavailable due to repeated connection failures. Check your settings or wait for the service to recover.",
)
+20
View File
@@ -79,5 +79,25 @@ class NavidromeAuthError(NavidromeApiError):
pass
class NavidromeSubsonicError(ExternalServiceError):
"""Non-auth error from a valid Subsonic API envelope.
Raised when Navidrome returns a well-formed ``subsonic-response`` with
a non-OK status and an error code that is *not* an authentication
failure (codes 40/41). These are potentially transient (e.g. "Library
not found or empty" during a rescan) and should be retried but must
**not** trip the circuit breaker.
"""
def __init__(
self,
message: str,
details: Any = None,
code: int | None = None,
):
super().__init__(message, details)
self.code = code
class ClientDisconnectedError(MusicseerrException):
pass
+6 -3
View File
@@ -160,7 +160,9 @@ class CircuitBreaker:
class CircuitOpenError(Exception):
pass
def __init__(self, message: str, breaker_name: str = ""):
super().__init__(message)
self.breaker_name = breaker_name
def _get_retry_after_seconds(exception: Exception) -> Optional[float]:
@@ -206,7 +208,8 @@ def with_retry(
extra={"service_name": service_name, "function": func_name}
)
raise CircuitOpenError(
f"Circuit breaker '{circuit_breaker.name}' is OPEN"
f"Circuit breaker '{circuit_breaker.name}' is OPEN",
breaker_name=circuit_breaker.name,
)
last_exception = None
@@ -293,7 +296,7 @@ def with_retry(
if circuit_breaker and last_exception:
is_non_breaking = isinstance(last_exception, non_breaking_exceptions) if non_breaking_exceptions else False
if not is_non_breaking or circuit_breaker.state == CircuitState.HALF_OPEN:
if not is_non_breaking:
await circuit_breaker.arecord_failure()
raise last_exception
+2 -2
View File
@@ -6,7 +6,7 @@ from typing import Any
import msgspec
from core.exceptions import NavidromeApiError, NavidromeAuthError
from core.exceptions import NavidromeApiError, NavidromeAuthError, NavidromeSubsonicError
logger = logging.getLogger(__name__)
@@ -88,7 +88,7 @@ def parse_subsonic_response(data: dict[str, Any]) -> dict[str, Any]:
message = error.get("message", "Unknown Subsonic API error")
if code in (40, 41):
raise NavidromeAuthError(message, code=code)
raise NavidromeApiError(message, code=code)
raise NavidromeSubsonicError(message, code=code)
return resp
+2 -1
View File
@@ -9,7 +9,7 @@ from urllib.parse import urlencode
import httpx
import msgspec
from core.exceptions import ExternalServiceError, NavidromeApiError, NavidromeAuthError
from core.exceptions import ExternalServiceError, NavidromeApiError, NavidromeAuthError, NavidromeSubsonicError
from infrastructure.cache.cache_keys import NAVIDROME_PREFIX
from infrastructure.cache.memory_cache import CacheInterface
from infrastructure.resilience.retry import with_retry, CircuitBreaker
@@ -210,6 +210,7 @@ class NavidromeRepository:
max_delay=5.0,
circuit_breaker=_navidrome_circuit_breaker,
retriable_exceptions=(httpx.HTTPError, ExternalServiceError),
non_breaking_exceptions=(NavidromeSubsonicError,),
)
async def _request(
self,
@@ -144,8 +144,8 @@ async def test_circuit_still_opens_for_real_errors_amid_rate_limits():
@pytest.mark.asyncio
async def test_non_breaking_in_half_open_reopens_circuit():
"""Non-breaking exceptions in HALF_OPEN must still reopen the circuit."""
async def test_non_breaking_in_half_open_stays_half_open():
"""Non-breaking exceptions in HALF_OPEN keep the circuit HALF_OPEN (service is reachable)."""
cb = CircuitBreaker(failure_threshold=2, success_threshold=2, timeout=0.01, name="test-half-open")
for _ in range(2):
@@ -170,7 +170,7 @@ async def test_non_breaking_in_half_open_reopens_circuit():
with pytest.raises(_RateLimited):
await rate_limited_in_half_open()
assert cb.state == CircuitState.OPEN
assert cb.state == CircuitState.HALF_OPEN
@pytest.mark.asyncio
@@ -6,7 +6,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
import httpx
import pytest
from core.exceptions import ExternalServiceError, NavidromeApiError, NavidromeAuthError
from core.exceptions import ExternalServiceError, NavidromeApiError, NavidromeAuthError, NavidromeSubsonicError
from repositories.navidrome_repository import _navidrome_circuit_breaker
from repositories.navidrome_models import (
SubsonicAlbum,
@@ -86,14 +86,14 @@ class TestParseSubsonicResponse:
resp = parse_subsonic_response(data)
assert resp["status"] == "ok"
def test_error_status_raises_api_error(self):
def test_error_status_raises_subsonic_error(self):
data = {
"subsonic-response": {
"status": "failed",
"error": {"code": 70, "message": "Not found"},
}
}
with pytest.raises(NavidromeApiError, match="Not found"):
with pytest.raises(NavidromeSubsonicError, match="Not found"):
parse_subsonic_response(data)
def test_auth_error_code_40(self):
@@ -333,6 +333,63 @@ class TestErrorHandling:
await repo._request("/rest/ping")
class TestCircuitBreakerNonBreaking:
"""Verify NavidromeSubsonicError doesn't trip the circuit breaker."""
def setup_method(self):
_navidrome_circuit_breaker.reset()
@pytest.mark.asyncio
async def test_subsonic_error_does_not_open_circuit_breaker(self):
"""Repeated SubsonicErrors (e.g. 'Library not found') should NOT open the CB."""
repo, client, _ = _make_repo()
error_envelope = {
"subsonic-response": {
"status": "failed",
"error": {"code": 70, "message": "Library not found or empty"},
}
}
client.get = AsyncMock(return_value=_mock_response(error_envelope))
with patch("infrastructure.resilience.retry.asyncio.sleep", new_callable=AsyncMock):
for _ in range(10):
with pytest.raises(NavidromeSubsonicError):
await repo._request("/rest/getAlbumList2")
from infrastructure.resilience.retry import CircuitState
assert _navidrome_circuit_breaker.state == CircuitState.CLOSED
@pytest.mark.asyncio
async def test_auth_error_still_trips_circuit_breaker(self):
"""Auth errors (NavidromeAuthError) should still record CB failures."""
from infrastructure.resilience.retry import CircuitOpenError, CircuitState
repo, client, _ = _make_repo()
client.get = AsyncMock(return_value=_mock_response({}, status_code=401))
with patch("infrastructure.resilience.retry.asyncio.sleep", new_callable=AsyncMock):
for _ in range(10):
with pytest.raises((NavidromeAuthError, ExternalServiceError, CircuitOpenError)):
await repo._request("/rest/ping")
assert _navidrome_circuit_breaker.state == CircuitState.OPEN
@pytest.mark.asyncio
async def test_connection_error_still_trips_circuit_breaker(self):
"""Transport-level errors should still record CB failures."""
from infrastructure.resilience.retry import CircuitOpenError, CircuitState
repo, client, _ = _make_repo()
client.get = AsyncMock(side_effect=httpx.ConnectError("refused"))
with patch("infrastructure.resilience.retry.asyncio.sleep", new_callable=AsyncMock):
for _ in range(10):
with pytest.raises((ExternalServiceError, CircuitOpenError)):
await repo._request("/rest/ping")
assert _navidrome_circuit_breaker.state == CircuitState.OPEN
class TestValidateConnection:
def setup_method(self):
_navidrome_circuit_breaker.reset()
@@ -91,6 +91,25 @@ class TestLibraryAlbums:
assert data["items"][0]["navidrome_id"] == "a1"
assert data["total"] == 1
def test_get_albums_stats_fallback(self, library_client, mock_library_service):
"""When stats fails, albums still returns with heuristic total."""
mock_library_service.get_albums = AsyncMock(return_value=[_album_summary(id=f"a{i}") for i in range(48)])
mock_library_service.get_stats = AsyncMock(side_effect=ExternalServiceError("Library not found"))
resp = library_client.get("/navidrome/albums?limit=48")
assert resp.status_code == 200
data = resp.json()
assert len(data["items"]) == 48
assert data["total"] == 49 # offset(0) + 48 + 1 (full page heuristic)
def test_get_albums_stats_fallback_partial_page(self, library_client, mock_library_service):
"""Partial page + stats failure → total = offset + len(items), no +1."""
mock_library_service.get_albums = AsyncMock(return_value=[_album_summary(id=f"a{i}") for i in range(5)])
mock_library_service.get_stats = AsyncMock(side_effect=ExternalServiceError("down"))
resp = library_client.get("/navidrome/albums?limit=48")
assert resp.status_code == 200
data = resp.json()
assert data["total"] == 5
def test_get_album_detail(self, library_client):
resp = library_client.get("/navidrome/albums/a1")
assert resp.status_code == 200
+24 -3
View File
@@ -26,7 +26,10 @@ def _build_app() -> FastAPI:
@app.get("/raise-circuit")
async def raise_circuit():
raise CircuitOpenError("JellyfinRepository after 5 failures")
raise CircuitOpenError(
"Circuit breaker 'jellyfin' is OPEN",
breaker_name="jellyfin",
)
app.add_exception_handler(ExternalServiceError, external_service_error_handler)
app.add_exception_handler(CircuitOpenError, circuit_open_error_handler)
@@ -70,5 +73,23 @@ async def test_circuit_open_error_hides_details():
body = resp.json()
assert resp.status_code == 503
assert body["error"]["message"] == "Service temporarily unavailable due to repeated connection failures. Check your settings or wait for the service to recover."
assert "JellyfinRepository" not in resp.text
assert body["error"]["message"] == "Jellyfin is temporarily unavailable due to repeated connection failures. Check your settings or wait for the service to recover."
assert "circuit breaker" not in resp.text.lower() or "CIRCUIT_BREAKER_OPEN" in resp.text
@pytest.mark.asyncio
async def test_circuit_open_error_without_breaker_name_falls_back():
app = FastAPI()
@app.get("/raise-circuit-unnamed")
async def raise_circuit_unnamed():
raise CircuitOpenError("CB tripped")
app.add_exception_handler(CircuitOpenError, circuit_open_error_handler)
transport = httpx.ASGITransport(app=app, raise_app_exceptions=False)
async with httpx.AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.get("/raise-circuit-unnamed")
body = resp.json()
assert resp.status_code == 503
assert body["error"]["message"].startswith("Service is temporarily unavailable")