Files
musicseerr/backend/repositories/coverart_disk_cache.py
T
2026-04-03 15:53:00 +01:00

420 lines
16 KiB
Python

import asyncio
import hashlib
import logging
from datetime import datetime
from pathlib import Path
from typing import Any, Optional
import aiofiles
import msgspec
logger = logging.getLogger(__name__)
def _encode_json(data: object) -> str:
return msgspec.json.encode(data).decode("utf-8")
def _decode_json(text: str) -> dict[str, Any]:
return msgspec.json.decode(text.encode("utf-8"), type=dict[str, Any])
def _log_task_error(task: asyncio.Task) -> None:
if not task.cancelled() and task.exception():
logger.error(f"Background task failed: {task.exception()}")
VALID_IMAGE_CONTENT_TYPES = frozenset([
"image/jpeg", "image/jpg", "image/png", "image/gif",
"image/webp", "image/avif", "image/svg+xml",
])
def is_valid_image_content_type(content_type: str) -> bool:
if not content_type:
return False
base_type = content_type.split(";")[0].strip().lower()
return base_type in VALID_IMAGE_CONTENT_TYPES
def get_cache_filename(identifier: str, suffix: str = "") -> str:
content = f"{identifier}:{suffix}"
hash_digest = hashlib.sha1(content.encode()).hexdigest()
return hash_digest
class CoverDiskCache:
def __init__(
self,
cache_dir: Path,
max_size_mb: Optional[int] = None,
eviction_check_interval_seconds: int = 60,
non_monitored_ttl_seconds: int = 86400,
):
self.cache_dir = cache_dir
self.cache_dir.mkdir(parents=True, exist_ok=True)
self.max_size_bytes = max_size_mb * 1024 * 1024 if max_size_mb and max_size_mb > 0 else None
self._eviction_check_interval_seconds = max(eviction_check_interval_seconds, 1)
self._non_monitored_ttl_seconds = max(non_monitored_ttl_seconds, 1)
self._last_eviction_check = 0.0
self._eviction_lock = asyncio.Lock()
async def write(
self,
file_path: Path,
content: bytes,
content_type: str,
extra_meta: Optional[dict[str, object]] = None,
is_monitored: bool = False,
) -> None:
try:
now = datetime.now().timestamp()
ttl = None if is_monitored else self._non_monitored_ttl_seconds
content_sha1 = hashlib.sha1(content).hexdigest()
meta = {
'content_type': content_type,
'created_at': now,
'last_accessed': now,
'size_bytes': len(content),
'is_monitored': is_monitored,
'content_sha1': content_sha1,
}
if ttl:
meta['expires_at'] = now + ttl
if extra_meta:
meta.update(extra_meta)
async def write_content():
async with aiofiles.open(file_path, 'wb') as f:
await f.write(content)
async def write_meta():
meta_path = file_path.with_suffix('.meta.json')
async with aiofiles.open(meta_path, 'w') as f:
await f.write(_encode_json(meta))
async def write_wikidata():
if extra_meta and 'wikidata_url' in extra_meta:
wikidata_path = file_path.with_suffix('.wikidata')
async with aiofiles.open(wikidata_path, 'w') as f:
await f.write(str(extra_meta['wikidata_url']))
await asyncio.gather(write_content(), write_meta(), write_wikidata())
await self.enforce_size_limit()
except Exception as e: # noqa: BLE001
logger.warning(f"Failed to write disk cache: {e}")
async def write_negative(
self,
file_path: Path,
ttl_seconds: int = 4 * 3600,
) -> None:
try:
now = datetime.now().timestamp()
meta = {
"created_at": now,
"last_accessed": now,
"expires_at": now + ttl_seconds,
"negative": True,
"is_monitored": False,
}
meta_path = file_path.with_suffix(".meta.json")
async with aiofiles.open(meta_path, "w") as f:
await f.write(_encode_json(meta))
except Exception as e: # noqa: BLE001
logger.warning(f"Failed to write negative disk cache: {e}")
async def is_negative(self, file_path: Path) -> bool:
meta_path = file_path.with_suffix(".meta.json")
if not meta_path.exists():
return False
try:
async with aiofiles.open(meta_path, "r") as f:
meta = _decode_json(await f.read())
if not meta.get("negative", False):
return False
expires_at = meta.get("expires_at")
if expires_at is None:
return False
now = datetime.now().timestamp()
if now > expires_at:
meta_path.unlink(missing_ok=True)
return False
task = asyncio.create_task(self._update_meta_access(meta_path, meta))
task.add_done_callback(_log_task_error)
return True
except Exception as e: # noqa: BLE001
logger.warning(f"Failed to read negative disk cache: {e}")
return False
async def read(
self,
file_path: Path,
extra_keys: Optional[list[str]] = None
) -> Optional[tuple[bytes, str, Optional[dict]]]:
if not file_path.exists():
return None
try:
async def read_content():
async with aiofiles.open(file_path, 'rb') as f:
return await f.read()
async def read_meta():
meta_path = file_path.with_suffix('.meta.json')
if meta_path.exists():
async with aiofiles.open(meta_path, 'r') as f:
return _decode_json(await f.read())
return None
content, meta = await asyncio.gather(read_content(), read_meta())
if not content:
return None
content_type = 'image/jpeg'
extra_data = {}
if meta:
content_type = meta.get('content_type', content_type)
if 'expires_at' in meta:
now = datetime.now().timestamp()
if now > meta['expires_at'] and not meta.get('is_monitored', False):
file_path.unlink(missing_ok=True)
file_path.with_suffix('.meta.json').unlink(missing_ok=True)
return None
if extra_keys:
async def read_extra_key(key: str):
if key in meta:
return key, meta.get(key)
ext_path = file_path.with_suffix(f'.{key}')
if ext_path.exists():
async with aiofiles.open(ext_path, 'r') as f:
return key, await f.read()
return key, None
results = await asyncio.gather(*[read_extra_key(k) for k in extra_keys])
for k, v in results:
if v is not None:
extra_data[k] = v
task = asyncio.create_task(self._update_meta_access(file_path.with_suffix('.meta.json'), meta))
task.add_done_callback(_log_task_error)
return content, content_type, extra_data if extra_data else None
except Exception as e: # noqa: BLE001
logger.warning(f"Failed to read disk cache: {e}")
return None
async def _update_meta_access(self, meta_file: Path, meta: dict) -> None:
if meta is None or not meta_file.exists():
return
try:
meta['last_accessed'] = datetime.now().timestamp()
async with aiofiles.open(meta_file, 'w') as f:
await f.write(_encode_json(meta))
except OSError as exc:
logger.debug("Failed to update coverart disk cache meta %s: %s", meta_file, exc)
async def get_content_hash(self, file_path: Path) -> Optional[str]:
meta_path = file_path.with_suffix('.meta.json')
if not meta_path.exists():
return None
try:
async with aiofiles.open(meta_path, 'r') as f:
meta = _decode_json(await f.read())
if 'expires_at' in meta and not meta.get('is_monitored', False):
now = datetime.now().timestamp()
if now > meta['expires_at']:
file_path.unlink(missing_ok=True)
meta_path.unlink(missing_ok=True)
file_path.with_suffix('.wikidata').unlink(missing_ok=True)
return None
content_hash = meta.get('content_sha1')
if content_hash:
task = asyncio.create_task(self._update_meta_access(meta_path, meta))
task.add_done_callback(_log_task_error)
return str(content_hash)
if not file_path.exists():
return None
async with aiofiles.open(file_path, 'rb') as f:
content = await f.read()
if not content:
return None
content_hash = hashlib.sha1(content).hexdigest()
meta['content_sha1'] = content_hash
await self._update_meta_access(meta_path, meta)
return content_hash
except Exception as e: # noqa: BLE001
logger.warning(f"Failed to get disk cache content hash: {e}")
return None
async def enforce_size_limit(self, force: bool = False) -> int:
if self.max_size_bytes is None:
return 0
now = datetime.now().timestamp()
if not force and (now - self._last_eviction_check) < self._eviction_check_interval_seconds:
return 0
async with self._eviction_lock:
now = datetime.now().timestamp()
if not force and (now - self._last_eviction_check) < self._eviction_check_interval_seconds:
return 0
self._last_eviction_check = now
total_bytes = 0
candidates: list[tuple[float, Path, int]] = []
for file_path in self.cache_dir.glob('*.bin'):
try:
size_bytes = file_path.stat().st_size
except FileNotFoundError:
continue
total_bytes += size_bytes
meta_path = file_path.with_suffix('.meta.json')
meta: dict = {}
if meta_path.exists():
try:
async with aiofiles.open(meta_path, 'r') as f:
meta = _decode_json(await f.read())
except Exception: # noqa: BLE001
meta = {}
if meta.get('is_monitored', False):
continue
last_accessed = float(meta.get('last_accessed', meta.get('created_at', 0.0)) or 0.0)
candidates.append((last_accessed, file_path, size_bytes))
if total_bytes <= self.max_size_bytes:
return 0
bytes_to_free = total_bytes - self.max_size_bytes
bytes_freed = 0
candidates.sort(key=lambda item: item[0])
for _, file_path, size_bytes in candidates:
file_path.unlink(missing_ok=True)
file_path.with_suffix('.meta.json').unlink(missing_ok=True)
file_path.with_suffix('.wikidata').unlink(missing_ok=True)
bytes_freed += size_bytes
if bytes_freed >= bytes_to_free:
break
if bytes_freed > 0:
logger.info(
"Evicted %d bytes from cover cache (target max=%d bytes)",
bytes_freed,
self.max_size_bytes,
)
return bytes_freed
async def delete_by_identifiers(self, identifiers: list[tuple[str, str]]) -> int:
count = 0
for identifier, suffix in identifiers:
cache_filename = get_cache_filename(identifier, suffix)
bin_path = self.cache_dir / f"{cache_filename}.bin"
existed = bin_path.exists()
bin_path.unlink(missing_ok=True)
(self.cache_dir / f"{cache_filename}.meta.json").unlink(missing_ok=True)
(self.cache_dir / f"{cache_filename}.wikidata").unlink(missing_ok=True)
if existed:
count += 1
return count
def cleanup_expired(self) -> int:
"""Sync — call via asyncio.to_thread() from background tasks."""
count = 0
now = datetime.now().timestamp()
if not self.cache_dir.exists():
return 0
for meta_path in self.cache_dir.glob("*.meta.json"):
try:
meta = _decode_json(meta_path.read_text())
except Exception: # noqa: BLE001
continue
if not meta.get("is_monitored", False) and "expires_at" in meta and meta["expires_at"] < now:
stem = meta_path.name.removesuffix(".meta.json")
(self.cache_dir / f"{stem}.bin").unlink(missing_ok=True)
meta_path.unlink(missing_ok=True)
(self.cache_dir / f"{stem}.wikidata").unlink(missing_ok=True)
count += 1
if count:
logger.info("Expired cover cache cleanup: removed %d entries", count)
return count
def demote_orphaned(self, valid_hashes: set[str]) -> int:
"""Sync — call via asyncio.to_thread() from background tasks."""
count = 0
now = datetime.now().timestamp()
if not self.cache_dir.exists():
return 0
for meta_path in self.cache_dir.glob("*.meta.json"):
try:
meta = _decode_json(meta_path.read_text())
except Exception: # noqa: BLE001
continue
if not meta.get("is_monitored", False):
continue
stem = meta_path.name.removesuffix(".meta.json")
if stem in valid_hashes:
continue
meta["is_monitored"] = False
meta["expires_at"] = now + 48 * 3600
try:
meta_path.write_text(_encode_json(meta))
except Exception: # noqa: BLE001
continue
count += 1
if count:
logger.info("Demoted %d orphaned monitored covers to expiring", count)
return count
def get_file_path(self, identifier: str, suffix: str) -> Path:
cache_filename = get_cache_filename(identifier, suffix)
return self.cache_dir / f"{cache_filename}.bin"
async def promote_to_persistent(self, identifier: str, identifier_type: str = "album") -> bool:
try:
if identifier_type == "album":
prefixes = ["rg_"]
sizes = ["250", "500"]
else:
prefixes = ["artist_"]
sizes = ["250", "500"]
for prefix in prefixes:
for size in sizes:
full_id = f"{prefix}{identifier}" if prefix == "artist_" else f"{prefix}{identifier}"
if prefix == "artist_":
full_id = f"artist_{identifier}_{size}"
suffix = "img"
else:
suffix = size
cache_filename = get_cache_filename(full_id, suffix)
file_path = self.cache_dir / f"{cache_filename}.bin"
meta_path = file_path.with_suffix('.meta.json')
if file_path.exists() and meta_path.exists():
async with aiofiles.open(meta_path, 'r') as f:
meta = _decode_json(await f.read())
if not meta.get('is_monitored', False):
meta['is_monitored'] = True
meta.pop('expires_at', None)
async with aiofiles.open(meta_path, 'w') as f:
await f.write(_encode_json(meta))
logger.debug(f"Promoted cover cache to persistent: {identifier_type}={identifier}, size={size}")
return True
except Exception as e: # noqa: BLE001
logger.warning(f"Failed to promote cover cache to persistent: {e}")
return False