fix: navidrome subsonic api errors incorrectly tripping circuit breaker (#9)
This commit is contained in:
@@ -38,19 +38,24 @@ async def get_navidrome_albums(
|
||||
genre: str = Query(default=""),
|
||||
service: NavidromeLibraryService = Depends(get_navidrome_library_service),
|
||||
) -> NavidromeAlbumPage:
|
||||
subsonic_type = "byGenre" if genre else _SORT_MAP.get(sort_by, "alphabeticalByName")
|
||||
try:
|
||||
if genre:
|
||||
subsonic_type = "byGenre"
|
||||
else:
|
||||
subsonic_type = _SORT_MAP.get(sort_by, "alphabeticalByName")
|
||||
items = await service.get_albums(type=subsonic_type, size=limit, offset=offset, genre=genre if genre else None)
|
||||
stats = await service.get_stats()
|
||||
total = stats.total_albums if len(items) >= limit else offset + len(items)
|
||||
return NavidromeAlbumPage(items=items, total=total)
|
||||
items = await service.get_albums(
|
||||
type=subsonic_type, size=limit, offset=offset, genre=genre if genre else None,
|
||||
)
|
||||
except ExternalServiceError as e:
|
||||
logger.error("Navidrome service error getting albums: %s", e)
|
||||
raise HTTPException(status_code=502, detail="Failed to communicate with Navidrome")
|
||||
|
||||
try:
|
||||
stats = await service.get_stats()
|
||||
total = stats.total_albums if len(items) >= limit else offset + len(items)
|
||||
except ExternalServiceError:
|
||||
logger.warning("Navidrome stats unavailable, using heuristic pagination total")
|
||||
total = offset + len(items) + (1 if len(items) >= limit else 0)
|
||||
|
||||
return NavidromeAlbumPage(items=items, total=total)
|
||||
|
||||
|
||||
@router.get("/albums/{album_id}", response_model=NavidromeAlbumDetail)
|
||||
async def get_navidrome_album_detail(
|
||||
|
||||
@@ -42,10 +42,11 @@ async def external_service_error_handler(request: Request, exc: ExternalServiceE
|
||||
|
||||
async def circuit_open_error_handler(request: Request, exc: CircuitOpenError) -> MsgSpecJSONResponse:
|
||||
logger.error("Circuit breaker open: %s - %s %s", exc, request.method, request.url.path)
|
||||
name = exc.breaker_name.replace("_", " ").title() if getattr(exc, "breaker_name", "") else "Service"
|
||||
return error_response(
|
||||
status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||
CIRCUIT_BREAKER_OPEN,
|
||||
"Service temporarily unavailable due to repeated connection failures. Check your settings or wait for the service to recover.",
|
||||
f"{name} is temporarily unavailable due to repeated connection failures. Check your settings or wait for the service to recover.",
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -79,5 +79,25 @@ class NavidromeAuthError(NavidromeApiError):
|
||||
pass
|
||||
|
||||
|
||||
class NavidromeSubsonicError(ExternalServiceError):
|
||||
"""Non-auth error from a valid Subsonic API envelope.
|
||||
|
||||
Raised when Navidrome returns a well-formed ``subsonic-response`` with
|
||||
a non-OK status and an error code that is *not* an authentication
|
||||
failure (codes 40/41). These are potentially transient (e.g. "Library
|
||||
not found or empty" during a rescan) and should be retried but must
|
||||
**not** trip the circuit breaker.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
details: Any = None,
|
||||
code: int | None = None,
|
||||
):
|
||||
super().__init__(message, details)
|
||||
self.code = code
|
||||
|
||||
|
||||
class ClientDisconnectedError(MusicseerrException):
|
||||
pass
|
||||
|
||||
@@ -160,7 +160,9 @@ class CircuitBreaker:
|
||||
|
||||
|
||||
class CircuitOpenError(Exception):
|
||||
pass
|
||||
def __init__(self, message: str, breaker_name: str = ""):
|
||||
super().__init__(message)
|
||||
self.breaker_name = breaker_name
|
||||
|
||||
|
||||
def _get_retry_after_seconds(exception: Exception) -> Optional[float]:
|
||||
@@ -206,7 +208,8 @@ def with_retry(
|
||||
extra={"service_name": service_name, "function": func_name}
|
||||
)
|
||||
raise CircuitOpenError(
|
||||
f"Circuit breaker '{circuit_breaker.name}' is OPEN"
|
||||
f"Circuit breaker '{circuit_breaker.name}' is OPEN",
|
||||
breaker_name=circuit_breaker.name,
|
||||
)
|
||||
|
||||
last_exception = None
|
||||
@@ -293,7 +296,7 @@ def with_retry(
|
||||
|
||||
if circuit_breaker and last_exception:
|
||||
is_non_breaking = isinstance(last_exception, non_breaking_exceptions) if non_breaking_exceptions else False
|
||||
if not is_non_breaking or circuit_breaker.state == CircuitState.HALF_OPEN:
|
||||
if not is_non_breaking:
|
||||
await circuit_breaker.arecord_failure()
|
||||
|
||||
raise last_exception
|
||||
|
||||
@@ -6,7 +6,7 @@ from typing import Any
|
||||
|
||||
import msgspec
|
||||
|
||||
from core.exceptions import NavidromeApiError, NavidromeAuthError
|
||||
from core.exceptions import NavidromeApiError, NavidromeAuthError, NavidromeSubsonicError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -88,7 +88,7 @@ def parse_subsonic_response(data: dict[str, Any]) -> dict[str, Any]:
|
||||
message = error.get("message", "Unknown Subsonic API error")
|
||||
if code in (40, 41):
|
||||
raise NavidromeAuthError(message, code=code)
|
||||
raise NavidromeApiError(message, code=code)
|
||||
raise NavidromeSubsonicError(message, code=code)
|
||||
return resp
|
||||
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ from urllib.parse import urlencode
|
||||
import httpx
|
||||
import msgspec
|
||||
|
||||
from core.exceptions import ExternalServiceError, NavidromeApiError, NavidromeAuthError
|
||||
from core.exceptions import ExternalServiceError, NavidromeApiError, NavidromeAuthError, NavidromeSubsonicError
|
||||
from infrastructure.cache.cache_keys import NAVIDROME_PREFIX
|
||||
from infrastructure.cache.memory_cache import CacheInterface
|
||||
from infrastructure.resilience.retry import with_retry, CircuitBreaker
|
||||
@@ -210,6 +210,7 @@ class NavidromeRepository:
|
||||
max_delay=5.0,
|
||||
circuit_breaker=_navidrome_circuit_breaker,
|
||||
retriable_exceptions=(httpx.HTTPError, ExternalServiceError),
|
||||
non_breaking_exceptions=(NavidromeSubsonicError,),
|
||||
)
|
||||
async def _request(
|
||||
self,
|
||||
|
||||
@@ -144,8 +144,8 @@ async def test_circuit_still_opens_for_real_errors_amid_rate_limits():
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_non_breaking_in_half_open_reopens_circuit():
|
||||
"""Non-breaking exceptions in HALF_OPEN must still reopen the circuit."""
|
||||
async def test_non_breaking_in_half_open_stays_half_open():
|
||||
"""Non-breaking exceptions in HALF_OPEN keep the circuit HALF_OPEN (service is reachable)."""
|
||||
cb = CircuitBreaker(failure_threshold=2, success_threshold=2, timeout=0.01, name="test-half-open")
|
||||
|
||||
for _ in range(2):
|
||||
@@ -170,7 +170,7 @@ async def test_non_breaking_in_half_open_reopens_circuit():
|
||||
with pytest.raises(_RateLimited):
|
||||
await rate_limited_in_half_open()
|
||||
|
||||
assert cb.state == CircuitState.OPEN
|
||||
assert cb.state == CircuitState.HALF_OPEN
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@@ -6,7 +6,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from core.exceptions import ExternalServiceError, NavidromeApiError, NavidromeAuthError
|
||||
from core.exceptions import ExternalServiceError, NavidromeApiError, NavidromeAuthError, NavidromeSubsonicError
|
||||
from repositories.navidrome_repository import _navidrome_circuit_breaker
|
||||
from repositories.navidrome_models import (
|
||||
SubsonicAlbum,
|
||||
@@ -86,14 +86,14 @@ class TestParseSubsonicResponse:
|
||||
resp = parse_subsonic_response(data)
|
||||
assert resp["status"] == "ok"
|
||||
|
||||
def test_error_status_raises_api_error(self):
|
||||
def test_error_status_raises_subsonic_error(self):
|
||||
data = {
|
||||
"subsonic-response": {
|
||||
"status": "failed",
|
||||
"error": {"code": 70, "message": "Not found"},
|
||||
}
|
||||
}
|
||||
with pytest.raises(NavidromeApiError, match="Not found"):
|
||||
with pytest.raises(NavidromeSubsonicError, match="Not found"):
|
||||
parse_subsonic_response(data)
|
||||
|
||||
def test_auth_error_code_40(self):
|
||||
@@ -333,6 +333,63 @@ class TestErrorHandling:
|
||||
await repo._request("/rest/ping")
|
||||
|
||||
|
||||
class TestCircuitBreakerNonBreaking:
|
||||
"""Verify NavidromeSubsonicError doesn't trip the circuit breaker."""
|
||||
|
||||
def setup_method(self):
|
||||
_navidrome_circuit_breaker.reset()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_subsonic_error_does_not_open_circuit_breaker(self):
|
||||
"""Repeated SubsonicErrors (e.g. 'Library not found') should NOT open the CB."""
|
||||
repo, client, _ = _make_repo()
|
||||
error_envelope = {
|
||||
"subsonic-response": {
|
||||
"status": "failed",
|
||||
"error": {"code": 70, "message": "Library not found or empty"},
|
||||
}
|
||||
}
|
||||
client.get = AsyncMock(return_value=_mock_response(error_envelope))
|
||||
|
||||
with patch("infrastructure.resilience.retry.asyncio.sleep", new_callable=AsyncMock):
|
||||
for _ in range(10):
|
||||
with pytest.raises(NavidromeSubsonicError):
|
||||
await repo._request("/rest/getAlbumList2")
|
||||
|
||||
from infrastructure.resilience.retry import CircuitState
|
||||
assert _navidrome_circuit_breaker.state == CircuitState.CLOSED
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_auth_error_still_trips_circuit_breaker(self):
|
||||
"""Auth errors (NavidromeAuthError) should still record CB failures."""
|
||||
from infrastructure.resilience.retry import CircuitOpenError, CircuitState
|
||||
|
||||
repo, client, _ = _make_repo()
|
||||
client.get = AsyncMock(return_value=_mock_response({}, status_code=401))
|
||||
|
||||
with patch("infrastructure.resilience.retry.asyncio.sleep", new_callable=AsyncMock):
|
||||
for _ in range(10):
|
||||
with pytest.raises((NavidromeAuthError, ExternalServiceError, CircuitOpenError)):
|
||||
await repo._request("/rest/ping")
|
||||
|
||||
assert _navidrome_circuit_breaker.state == CircuitState.OPEN
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_connection_error_still_trips_circuit_breaker(self):
|
||||
"""Transport-level errors should still record CB failures."""
|
||||
from infrastructure.resilience.retry import CircuitOpenError, CircuitState
|
||||
|
||||
repo, client, _ = _make_repo()
|
||||
client.get = AsyncMock(side_effect=httpx.ConnectError("refused"))
|
||||
|
||||
with patch("infrastructure.resilience.retry.asyncio.sleep", new_callable=AsyncMock):
|
||||
for _ in range(10):
|
||||
with pytest.raises((ExternalServiceError, CircuitOpenError)):
|
||||
await repo._request("/rest/ping")
|
||||
|
||||
assert _navidrome_circuit_breaker.state == CircuitState.OPEN
|
||||
|
||||
|
||||
class TestValidateConnection:
|
||||
def setup_method(self):
|
||||
_navidrome_circuit_breaker.reset()
|
||||
|
||||
@@ -91,6 +91,25 @@ class TestLibraryAlbums:
|
||||
assert data["items"][0]["navidrome_id"] == "a1"
|
||||
assert data["total"] == 1
|
||||
|
||||
def test_get_albums_stats_fallback(self, library_client, mock_library_service):
|
||||
"""When stats fails, albums still returns with heuristic total."""
|
||||
mock_library_service.get_albums = AsyncMock(return_value=[_album_summary(id=f"a{i}") for i in range(48)])
|
||||
mock_library_service.get_stats = AsyncMock(side_effect=ExternalServiceError("Library not found"))
|
||||
resp = library_client.get("/navidrome/albums?limit=48")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert len(data["items"]) == 48
|
||||
assert data["total"] == 49 # offset(0) + 48 + 1 (full page heuristic)
|
||||
|
||||
def test_get_albums_stats_fallback_partial_page(self, library_client, mock_library_service):
|
||||
"""Partial page + stats failure → total = offset + len(items), no +1."""
|
||||
mock_library_service.get_albums = AsyncMock(return_value=[_album_summary(id=f"a{i}") for i in range(5)])
|
||||
mock_library_service.get_stats = AsyncMock(side_effect=ExternalServiceError("down"))
|
||||
resp = library_client.get("/navidrome/albums?limit=48")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["total"] == 5
|
||||
|
||||
def test_get_album_detail(self, library_client):
|
||||
resp = library_client.get("/navidrome/albums/a1")
|
||||
assert resp.status_code == 200
|
||||
|
||||
@@ -26,7 +26,10 @@ def _build_app() -> FastAPI:
|
||||
|
||||
@app.get("/raise-circuit")
|
||||
async def raise_circuit():
|
||||
raise CircuitOpenError("JellyfinRepository after 5 failures")
|
||||
raise CircuitOpenError(
|
||||
"Circuit breaker 'jellyfin' is OPEN",
|
||||
breaker_name="jellyfin",
|
||||
)
|
||||
|
||||
app.add_exception_handler(ExternalServiceError, external_service_error_handler)
|
||||
app.add_exception_handler(CircuitOpenError, circuit_open_error_handler)
|
||||
@@ -70,5 +73,23 @@ async def test_circuit_open_error_hides_details():
|
||||
|
||||
body = resp.json()
|
||||
assert resp.status_code == 503
|
||||
assert body["error"]["message"] == "Service temporarily unavailable due to repeated connection failures. Check your settings or wait for the service to recover."
|
||||
assert "JellyfinRepository" not in resp.text
|
||||
assert body["error"]["message"] == "Jellyfin is temporarily unavailable due to repeated connection failures. Check your settings or wait for the service to recover."
|
||||
assert "circuit breaker" not in resp.text.lower() or "CIRCUIT_BREAKER_OPEN" in resp.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_circuit_open_error_without_breaker_name_falls_back():
|
||||
app = FastAPI()
|
||||
|
||||
@app.get("/raise-circuit-unnamed")
|
||||
async def raise_circuit_unnamed():
|
||||
raise CircuitOpenError("CB tripped")
|
||||
|
||||
app.add_exception_handler(CircuitOpenError, circuit_open_error_handler)
|
||||
transport = httpx.ASGITransport(app=app, raise_app_exceptions=False)
|
||||
async with httpx.AsyncClient(transport=transport, base_url="http://test") as client:
|
||||
resp = await client.get("/raise-circuit-unnamed")
|
||||
|
||||
body = resp.json()
|
||||
assert resp.status_code == 503
|
||||
assert body["error"]["message"].startswith("Service is temporarily unavailable")
|
||||
|
||||
Reference in New Issue
Block a user