import asyncio import hashlib import json import logging import shutil import time from pathlib import Path from typing import Any from infrastructure.serialization import to_jsonable logger = logging.getLogger(__name__) def _encode_json(value: Any) -> str: return json.dumps(value, ensure_ascii=True, separators=(",", ":")) def _decode_json(text: str) -> Any: return json.loads(text) class DiskMetadataCache: def __init__( self, base_path: Path, recent_metadata_max_size_mb: int = 128, recent_covers_max_size_mb: int = 0, persistent_metadata_ttl_hours: int = 24, ): self.base_path = Path(base_path) self.recent_metadata_max_size_bytes = max(recent_metadata_max_size_mb, 0) * 1024 * 1024 self.recent_covers_max_size_bytes = max(recent_covers_max_size_mb, 0) * 1024 * 1024 self.default_ttl_seconds = max(persistent_metadata_ttl_hours, 1) * 3600 self._recent_albums_dir = self.base_path / "recent" / "albums" self._recent_artists_dir = self.base_path / "recent" / "artists" self._recent_covers_dir = self.base_path / "recent" / "covers" self._persistent_albums_dir = self.base_path / "persistent" / "albums" self._persistent_artists_dir = self.base_path / "persistent" / "artists" self._recent_audiodb_artists_dir = self.base_path / "recent" / "audiodb_artists" self._recent_audiodb_albums_dir = self.base_path / "recent" / "audiodb_albums" self._persistent_audiodb_artists_dir = self.base_path / "persistent" / "audiodb_artists" self._persistent_audiodb_albums_dir = self.base_path / "persistent" / "audiodb_albums" self._ensure_dirs() def _ensure_dirs(self) -> None: for path in ( self._recent_albums_dir, self._recent_artists_dir, self._recent_covers_dir, self._persistent_albums_dir, self._persistent_artists_dir, self._recent_audiodb_artists_dir, self._recent_audiodb_albums_dir, self._persistent_audiodb_artists_dir, self._persistent_audiodb_albums_dir, ): path.mkdir(parents=True, exist_ok=True) @staticmethod def _cache_hash(identifier: str) -> str: return hashlib.sha1(identifier.encode()).hexdigest() @staticmethod def _meta_path(file_path: Path) -> Path: return file_path.with_suffix(".meta.json") def _entity_paths(self, entity_type: str, identifier: str) -> tuple[Path, Path]: cache_hash = self._cache_hash(identifier) if entity_type == "album": return ( self._recent_albums_dir / f"{cache_hash}.json", self._persistent_albums_dir / f"{cache_hash}.json", ) if entity_type == "artist": return ( self._recent_artists_dir / f"{cache_hash}.json", self._persistent_artists_dir / f"{cache_hash}.json", ) if entity_type == "audiodb_artist": return ( self._recent_audiodb_artists_dir / f"{cache_hash}.json", self._persistent_audiodb_artists_dir / f"{cache_hash}.json", ) if entity_type == "audiodb_album": return ( self._recent_audiodb_albums_dir / f"{cache_hash}.json", self._persistent_audiodb_albums_dir / f"{cache_hash}.json", ) raise ValueError(f"Unsupported entity type: {entity_type}") def _delete_file_pair(self, file_path: Path) -> None: file_path.unlink(missing_ok=True) self._meta_path(file_path).unlink(missing_ok=True) def _load_meta(self, meta_path: Path) -> dict[str, Any]: if not meta_path.exists(): return {} try: payload = _decode_json(meta_path.read_text()) except (json.JSONDecodeError, OSError, TypeError): return {} return payload if isinstance(payload, dict) else {} @staticmethod def _is_expired(meta: dict[str, Any]) -> bool: expires_at = meta.get("expires_at") return isinstance(expires_at, (int, float)) and time.time() > float(expires_at) def _cleanup_expired_directory(self, directory: Path) -> int: removed = 0 handled_meta_paths: set[Path] = set() for data_path in directory.iterdir(): if not data_path.is_file() or data_path.name.endswith(".meta.json"): continue meta_path = self._meta_path(data_path) handled_meta_paths.add(meta_path) if self._is_expired(self._load_meta(meta_path)): self._delete_file_pair(data_path) removed += 1 for meta_path in directory.glob("*.meta.json"): if meta_path in handled_meta_paths: continue if self._is_expired(self._load_meta(meta_path)): meta_path.unlink(missing_ok=True) removed += 1 return removed def _enforce_size_limit_for_directory(self, directory: Path, max_size_bytes: int) -> int: if max_size_bytes <= 0: return 0 candidates: list[tuple[float, Path, int]] = [] total_size = 0 for data_path in directory.iterdir(): if not data_path.is_file() or data_path.name.endswith(".meta.json"): continue try: size_bytes = data_path.stat().st_size except FileNotFoundError: continue meta = self._load_meta(self._meta_path(data_path)) last_accessed = float(meta.get("last_accessed", meta.get("created_at", 0.0)) or 0.0) total_size += size_bytes candidates.append((last_accessed, data_path, size_bytes)) if total_size <= max_size_bytes: return 0 bytes_to_free = total_size - max_size_bytes freed = 0 for _, data_path, size_bytes in sorted(candidates, key=lambda item: item[0]): self._delete_file_pair(data_path) freed += size_bytes if freed >= bytes_to_free: break return freed def _write_json_entry(self, file_path: Path, payload: dict[str, Any], expires_at: float | None) -> None: file_path.parent.mkdir(parents=True, exist_ok=True) now = time.time() file_path.write_text(_encode_json(payload)) meta = { "created_at": now, "last_accessed": now, } if expires_at is not None: meta["expires_at"] = expires_at self._meta_path(file_path).write_text(_encode_json(meta)) def _read_json_entry(self, file_path: Path, honor_expiry: bool) -> dict[str, Any] | None: if not file_path.exists(): return None meta_path = self._meta_path(file_path) meta: dict[str, Any] = {} if meta_path.exists(): try: meta = _decode_json(meta_path.read_text()) except (json.JSONDecodeError, OSError, TypeError): meta = {} if honor_expiry: expires_at = meta.get("expires_at") if isinstance(expires_at, (int, float)) and time.time() > float(expires_at): self._delete_file_pair(file_path) return None try: payload = _decode_json(file_path.read_text()) except (json.JSONDecodeError, OSError, TypeError): self._delete_file_pair(file_path) return None if not isinstance(payload, dict): self._delete_file_pair(file_path) return None if meta_path.exists(): meta["last_accessed"] = time.time() try: meta_path.write_text(_encode_json(meta)) except OSError as exc: logger.debug("Failed to update disk cache access time for %s: %s", meta_path, exc) return payload async def _set_entity( self, entity_type: str, identifier: str, payload: Any, is_monitored: bool, ttl_seconds: int | None, ) -> None: builtins = to_jsonable(payload) if not isinstance(builtins, dict): raise TypeError(f"Expected mapping payload for {entity_type} cache, got {type(builtins)!r}") recent_path, persistent_path = self._entity_paths(entity_type, identifier) def operation() -> None: target_path = persistent_path if is_monitored else recent_path other_path = recent_path if is_monitored else persistent_path self._delete_file_pair(other_path) expires_at = None if ttl_seconds is not None: expires_at = time.time() + max(ttl_seconds, 1) elif not is_monitored: expires_at = time.time() + max(self.default_ttl_seconds, 1) self._write_json_entry(target_path, builtins, expires_at) await asyncio.to_thread(operation) async def _get_entity(self, entity_type: str, identifier: str) -> dict[str, Any] | None: recent_path, persistent_path = self._entity_paths(entity_type, identifier) def operation() -> dict[str, Any] | None: persistent_payload = self._read_json_entry(persistent_path, honor_expiry=True) if persistent_payload is not None: return persistent_payload return self._read_json_entry(recent_path, honor_expiry=True) return await asyncio.to_thread(operation) async def set_album( self, musicbrainz_id: str, album_info: Any, is_monitored: bool = False, ttl_seconds: int | None = None, ) -> None: await self._set_entity("album", musicbrainz_id, album_info, is_monitored, ttl_seconds) async def get_album(self, musicbrainz_id: str) -> dict[str, Any] | None: return await self._get_entity("album", musicbrainz_id) async def set_artist( self, musicbrainz_id: str, artist_info: Any, is_monitored: bool = False, ttl_seconds: int | None = None, ) -> None: await self._set_entity("artist", musicbrainz_id, artist_info, is_monitored, ttl_seconds) async def get_artist(self, musicbrainz_id: str) -> dict[str, Any] | None: return await self._get_entity("artist", musicbrainz_id) async def set_audiodb_artist( self, identifier: str, payload: Any, is_monitored: bool = False, ttl_seconds: int | None = None, ) -> None: await self._set_entity("audiodb_artist", identifier, payload, is_monitored, ttl_seconds) async def get_audiodb_artist(self, identifier: str) -> dict[str, Any] | None: return await self._get_entity("audiodb_artist", identifier) async def set_audiodb_album( self, identifier: str, payload: Any, is_monitored: bool = False, ttl_seconds: int | None = None, ) -> None: await self._set_entity("audiodb_album", identifier, payload, is_monitored, ttl_seconds) async def get_audiodb_album(self, identifier: str) -> dict[str, Any] | None: return await self._get_entity("audiodb_album", identifier) async def delete_album(self, musicbrainz_id: str) -> None: recent_path, persistent_path = self._entity_paths("album", musicbrainz_id) await asyncio.to_thread(self._delete_file_pair, recent_path) await asyncio.to_thread(self._delete_file_pair, persistent_path) async def delete_artist(self, musicbrainz_id: str) -> None: recent_path, persistent_path = self._entity_paths("artist", musicbrainz_id) await asyncio.to_thread(self._delete_file_pair, recent_path) await asyncio.to_thread(self._delete_file_pair, persistent_path) async def delete_entity(self, entity_type: str, identifier: str) -> None: recent_path, persistent_path = self._entity_paths(entity_type, identifier) await asyncio.to_thread(self._delete_file_pair, recent_path) await asyncio.to_thread(self._delete_file_pair, persistent_path) async def promote_to_persistent(self, identifier: str, identifier_type: str) -> bool: entity_type = "artist" if identifier_type == "artist" else "album" recent_path, persistent_path = self._entity_paths(entity_type, identifier) def operation() -> bool: if persistent_path.exists(): return True if not recent_path.exists(): return False persistent_path.parent.mkdir(parents=True, exist_ok=True) shutil.move(str(recent_path), str(persistent_path)) recent_meta = self._meta_path(recent_path) persistent_meta = self._meta_path(persistent_path) if recent_meta.exists(): meta = _decode_json(recent_meta.read_text()) meta.pop("expires_at", None) meta["last_accessed"] = time.time() persistent_meta.write_text(_encode_json(meta)) recent_meta.unlink(missing_ok=True) return True return await asyncio.to_thread(operation) async def promote_album_to_persistent(self, musicbrainz_id: str) -> bool: return await self.promote_to_persistent(musicbrainz_id, "album") async def promote_artist_to_persistent(self, musicbrainz_id: str) -> bool: return await self.promote_to_persistent(musicbrainz_id, "artist") async def cleanup_expired_recent(self) -> int: def operation() -> int: removed = 0 for base_dir in ( self._recent_albums_dir, self._recent_artists_dir, self._recent_audiodb_artists_dir, self._recent_audiodb_albums_dir, ): removed += self._cleanup_expired_directory(base_dir) return removed return await asyncio.to_thread(operation) async def enforce_recent_size_limits(self) -> int: if self.recent_metadata_max_size_bytes <= 0: return 0 def operation() -> int: candidates: list[tuple[float, Path, int]] = [] total_size = 0 for base_dir in ( self._recent_albums_dir, self._recent_artists_dir, self._recent_audiodb_artists_dir, self._recent_audiodb_albums_dir, ): for data_path in base_dir.glob("*.json"): if data_path.name.endswith(".meta.json"): continue try: size_bytes = data_path.stat().st_size except FileNotFoundError: continue meta_path = self._meta_path(data_path) meta: dict[str, Any] = {} if meta_path.exists(): try: meta = _decode_json(meta_path.read_text()) except Exception: # noqa: BLE001 meta = {} last_accessed = float(meta.get("last_accessed", meta.get("created_at", 0.0)) or 0.0) total_size += size_bytes candidates.append((last_accessed, data_path, size_bytes)) if total_size <= self.recent_metadata_max_size_bytes: return 0 bytes_to_free = total_size - self.recent_metadata_max_size_bytes freed = 0 for _, data_path, size_bytes in sorted(candidates, key=lambda item: item[0]): self._delete_file_pair(data_path) freed += size_bytes if freed >= bytes_to_free: break return freed return await asyncio.to_thread(operation) async def cleanup_expired_covers(self) -> int: return await asyncio.to_thread(self._cleanup_expired_directory, self._recent_covers_dir) async def enforce_cover_size_limits(self) -> int: return await asyncio.to_thread( self._enforce_size_limit_for_directory, self._recent_covers_dir, self.recent_covers_max_size_bytes, ) def get_stats(self) -> dict[str, Any]: total_count = 0 album_count = 0 artist_count = 0 audiodb_artist_count = 0 audiodb_album_count = 0 total_size_bytes = 0 for base_dir, counter_name in ( (self._recent_albums_dir, "album"), (self._persistent_albums_dir, "album"), (self._recent_artists_dir, "artist"), (self._persistent_artists_dir, "artist"), (self._recent_audiodb_artists_dir, "audiodb_artist"), (self._persistent_audiodb_artists_dir, "audiodb_artist"), (self._recent_audiodb_albums_dir, "audiodb_album"), (self._persistent_audiodb_albums_dir, "audiodb_album"), ): for data_path in base_dir.glob("*.json"): if data_path.name.endswith(".meta.json"): continue total_count += 1 if counter_name == "album": album_count += 1 elif counter_name == "artist": artist_count += 1 elif counter_name == "audiodb_artist": audiodb_artist_count += 1 elif counter_name == "audiodb_album": audiodb_album_count += 1 try: total_size_bytes += data_path.stat().st_size except FileNotFoundError: pass return { "total_count": total_count, "album_count": album_count, "artist_count": artist_count, "audiodb_artist_count": audiodb_artist_count, "audiodb_album_count": audiodb_album_count, "total_size_bytes": total_size_bytes, } async def clear_all(self) -> None: def operation() -> None: if self.base_path.exists(): shutil.rmtree(self.base_path) self._ensure_dirs() await asyncio.to_thread(operation) async def clear_audiodb(self) -> None: def operation() -> None: for d in ( self._recent_audiodb_artists_dir, self._recent_audiodb_albums_dir, self._persistent_audiodb_artists_dir, self._persistent_audiodb_albums_dir, ): if d.exists(): shutil.rmtree(d) self._ensure_dirs() await asyncio.to_thread(operation)