Files
musicseerr/backend/services/search_service.py
T
Harvey d24e26fb32 In library rework + Monitored/Unmonitored statuses (#50)
* In library rework + Monitored/Unmonitored statuses

* address comments + format
2026-04-16 00:51:13 +01:00

336 lines
14 KiB
Python

import asyncio
import logging
import re
import time
import unicodedata
from math import ceil
from typing import Optional, TYPE_CHECKING
from api.v1.schemas.search import SearchResult, SearchResponse, SuggestResult, SuggestResponse
from repositories.protocols import MusicBrainzRepositoryProtocol, LidarrRepositoryProtocol, CoverArtRepositoryProtocol
from services.preferences_service import PreferencesService
from infrastructure.http.deduplication import deduplicate
if TYPE_CHECKING:
from services.audiodb_image_service import AudioDBImageService
from services.audiodb_browse_queue import AudioDBBrowseQueue
logger = logging.getLogger(__name__)
COVER_PREFETCH_LIMIT = 12
SEARCH_CACHE_TTL = 90
SEARCH_CACHE_MAX_SIZE = 200
TOP_RESULT_SCORE_THRESHOLD = 90
class SearchService:
_search_cache: dict[str, tuple[float, SearchResponse]] = {}
def __init__(
self,
mb_repo: MusicBrainzRepositoryProtocol,
lidarr_repo: LidarrRepositoryProtocol,
coverart_repo: CoverArtRepositoryProtocol,
preferences_service: PreferencesService,
audiodb_image_service: "AudioDBImageService | None" = None,
audiodb_browse_queue: "AudioDBBrowseQueue | None" = None,
):
self._mb_repo = mb_repo
self._lidarr_repo = lidarr_repo
self._coverart_repo = coverart_repo
self._preferences_service = preferences_service
self._audiodb_image_service = audiodb_image_service
self._audiodb_browse_queue = audiodb_browse_queue
async def _safe_gather(self, *tasks):
results = await asyncio.gather(*tasks, return_exceptions=True)
return [r if not isinstance(r, Exception) else None for r in results]
@staticmethod
def _normalize_tokens(text: str) -> set[str]:
"""Strip diacritics and non-alphanumeric chars, then tokenize."""
nfkd = unicodedata.normalize("NFKD", text.lower())
stripped = "".join(c for c in nfkd if not unicodedata.combining(c))
cleaned = re.sub(r"[^a-z0-9\s]", "", stripped)
return set(cleaned.split())
@staticmethod
def _tokens_match(query_tokens: set[str], title_tokens: set[str]) -> bool:
"""Check token overlap allowing prefix matching for partial queries."""
min_prefix = 2
if all(
any(qt == tt or (len(qt) >= min_prefix and tt.startswith(qt)) for tt in title_tokens)
for qt in query_tokens
):
return True
if all(
any(tt == qt or (len(tt) >= min_prefix and qt.startswith(tt)) for qt in query_tokens)
for tt in title_tokens
):
return True
return False
@staticmethod
def _detect_top_result(results: list[SearchResult], query: str) -> SearchResult | None:
if not results:
return None
best = results[0]
if best.score < TOP_RESULT_SCORE_THRESHOLD:
return None
query_tokens = SearchService._normalize_tokens(query)
title_tokens = SearchService._normalize_tokens(best.title)
if not query_tokens or not title_tokens:
return None
if SearchService._tokens_match(query_tokens, title_tokens):
return best
return None
async def _apply_audiodb_search_overlay(self, results: list[SearchResult]) -> None:
if self._audiodb_image_service is None:
return
tasks = []
task_indices = []
for i, item in enumerate(results):
if not item.musicbrainz_id:
continue
if item.type == "artist":
tasks.append(self._audiodb_image_service.get_cached_artist_images(item.musicbrainz_id))
task_indices.append(i)
elif item.type == "album":
tasks.append(self._audiodb_image_service.get_cached_album_images(item.musicbrainz_id))
task_indices.append(i)
if not tasks:
return
images_results = await asyncio.gather(*tasks, return_exceptions=True)
for idx, images in zip(task_indices, images_results):
item = results[idx]
if isinstance(images, Exception):
logger.warning("AudioDB search overlay failed for %s %s: %s", item.type, item.musicbrainz_id[:8], images)
continue
try:
if item.type == "artist":
if images and not images.is_negative:
if not item.thumb_url and images.thumb_url:
item.thumb_url = images.thumb_url
if not item.fanart_url and images.fanart_url:
item.fanart_url = images.fanart_url
if not item.banner_url and images.banner_url:
item.banner_url = images.banner_url
elif images is None and self._audiodb_browse_queue:
settings = self._preferences_service.get_advanced_settings()
if settings.audiodb_enabled:
await self._audiodb_browse_queue.enqueue(
"artist", item.musicbrainz_id, name=item.title,
)
elif item.type == "album":
if images and not images.is_negative:
if not item.album_thumb_url and images.album_thumb_url:
item.album_thumb_url = images.album_thumb_url
elif images is None and self._audiodb_browse_queue:
settings = self._preferences_service.get_advanced_settings()
if settings.audiodb_enabled:
await self._audiodb_browse_queue.enqueue(
"album", item.musicbrainz_id,
name=item.title,
artist_name=item.artist,
)
except Exception as e: # noqa: BLE001
logger.warning("AudioDB search overlay apply failed for %s %s: %s", item.type, item.musicbrainz_id[:8], e)
@deduplicate(lambda self, query, limit_artists=10, limit_albums=10, buckets=None: f"search:{query}:{limit_artists}:{limit_albums}:{buckets}")
async def search(
self,
query: str,
limit_artists: int = 10,
limit_albums: int = 10,
buckets: Optional[list[str]] = None
) -> SearchResponse:
cache_key = f"{query.strip().lower()}:{limit_artists}:{limit_albums}:{','.join(sorted(buckets)) if buckets else ''}"
now = time.monotonic()
cached = self._search_cache.get(cache_key)
if cached and (now - cached[0]) < SEARCH_CACHE_TTL:
return cached[1]
prefs = self._preferences_service.get_preferences()
included_secondary_types = set(t.lower() for t in prefs.secondary_types)
limits = {}
if not buckets or "artists" in buckets:
limits["artists"] = limit_artists
if not buckets or "albums" in buckets:
limits["albums"] = limit_albums
try:
grouped, library_mbids_raw, queue_items_raw, monitored_mbids_raw = await self._safe_gather(
self._mb_repo.search_grouped(
query,
limits=limits,
buckets=buckets,
included_secondary_types=included_secondary_types
),
self._lidarr_repo.get_library_mbids(include_release_ids=True),
self._lidarr_repo.get_queue(),
self._lidarr_repo.get_monitored_no_files_mbids(),
)
except Exception as e: # noqa: BLE001
logger.error(f"Search gather failed unexpectedly: {e}")
grouped, library_mbids_raw, queue_items_raw, monitored_mbids_raw = None, None, None, None
if grouped is None:
logger.warning("MusicBrainz search returned no results or failed")
grouped = grouped or {"artists": [], "albums": []}
library_mbids = library_mbids_raw or set()
if queue_items_raw:
queued_mbids = {item.musicbrainz_id.lower() for item in queue_items_raw if item.musicbrainz_id}
else:
queued_mbids = set()
monitored_mbids = monitored_mbids_raw or set()
for item in grouped.get("albums", []):
mbid_lower = (item.musicbrainz_id or "").lower()
item.in_library = mbid_lower in library_mbids
item.requested = mbid_lower in queued_mbids and not item.in_library
item.monitored = mbid_lower in monitored_mbids and not item.in_library and not item.requested
all_results = grouped.get("artists", []) + grouped.get("albums", [])
await self._apply_audiodb_search_overlay(all_results)
top_artist = self._detect_top_result(grouped.get("artists", []), query)
top_album = self._detect_top_result(grouped.get("albums", []), query)
response = SearchResponse(
artists=grouped.get("artists", []),
albums=grouped.get("albums", []),
top_artist=top_artist,
top_album=top_album,
)
self._search_cache[cache_key] = (now, response)
if len(self._search_cache) > SEARCH_CACHE_MAX_SIZE:
expired = [k for k, (ts, _) in self._search_cache.items() if (now - ts) >= SEARCH_CACHE_TTL]
for k in expired:
del self._search_cache[k]
if len(self._search_cache) > SEARCH_CACHE_MAX_SIZE:
oldest_key = min(self._search_cache, key=lambda k: self._search_cache[k][0])
del self._search_cache[oldest_key]
return response
def schedule_cover_prefetch(self, albums: list[SearchResult]) -> list[str]:
return [
item.musicbrainz_id
for item in albums[:COVER_PREFETCH_LIMIT]
if item.musicbrainz_id
]
@deduplicate(lambda self, bucket, query, limit=50, offset=0: f"search_bucket:{bucket}:{query}:{limit}:{offset}")
async def search_bucket(
self,
bucket: str,
query: str,
limit: int = 50,
offset: int = 0
) -> tuple[list[SearchResult], SearchResult | None]:
prefs = self._preferences_service.get_preferences()
included_secondary_types = set(t.lower() for t in prefs.secondary_types)
if bucket == "artists":
results = await self._mb_repo.search_artists(query, limit=limit, offset=offset)
elif bucket == "albums":
results = await self._mb_repo.search_albums(
query,
limit=limit,
offset=offset,
included_secondary_types=included_secondary_types
)
else:
return [], None
if bucket == "albums":
library_mbids_raw, queue_items_raw, monitored_mbids_raw = await self._safe_gather(
self._lidarr_repo.get_library_mbids(include_release_ids=True),
self._lidarr_repo.get_queue(),
self._lidarr_repo.get_monitored_no_files_mbids(),
)
library_mbids = library_mbids_raw or set()
if queue_items_raw:
queued_mbids = {item.musicbrainz_id.lower() for item in queue_items_raw if item.musicbrainz_id}
else:
queued_mbids = set()
monitored_mbids = monitored_mbids_raw or set()
for item in results:
mbid_lower = (item.musicbrainz_id or "").lower()
item.in_library = mbid_lower in library_mbids
item.requested = mbid_lower in queued_mbids and not item.in_library
item.monitored = mbid_lower in monitored_mbids and not item.in_library and not item.requested
await self._apply_audiodb_search_overlay(results)
top_result = self._detect_top_result(results, query) if offset == 0 else None
return results, top_result
@deduplicate(lambda self, query, limit=5: f"suggest:{query.strip().lower()}:{limit}")
async def suggest(self, query: str, limit: int = 5) -> SuggestResponse:
query = query.strip()
if len(query) < 2:
return SuggestResponse()
prefs = self._preferences_service.get_preferences()
included_secondary_types = set(t.lower() for t in prefs.secondary_types)
bucket_limit = ceil(limit * 0.6)
try:
grouped = await self._mb_repo.search_grouped(
query,
limits={"artists": bucket_limit, "albums": bucket_limit},
included_secondary_types=included_secondary_types,
)
except Exception as e: # noqa: BLE001
logger.warning("MusicBrainz suggest failed (query_len=%d): %s", len(query), type(e).__name__)
return SuggestResponse()
grouped = grouped or {"artists": [], "albums": []}
library_mbids_raw, queue_items_raw, monitored_mbids_raw = await self._safe_gather(
self._lidarr_repo.get_library_mbids(include_release_ids=True),
self._lidarr_repo.get_queue(),
self._lidarr_repo.get_monitored_no_files_mbids(),
)
library_mbids = library_mbids_raw or set()
if queue_items_raw:
queued_mbids = {item.musicbrainz_id.lower() for item in queue_items_raw if item.musicbrainz_id}
else:
queued_mbids = set()
monitored_mbids = monitored_mbids_raw or set()
for item in grouped.get("albums", []):
mbid_lower = (item.musicbrainz_id or "").lower()
item.in_library = mbid_lower in library_mbids
item.requested = mbid_lower in queued_mbids and not item.in_library
item.monitored = mbid_lower in monitored_mbids and not item.in_library and not item.requested
suggestions: list[SuggestResult] = []
for item in grouped.get("artists", []) + grouped.get("albums", []):
suggestions.append(SuggestResult(
type=item.type,
title=item.title,
artist=item.artist,
year=item.year,
musicbrainz_id=item.musicbrainz_id,
in_library=item.in_library,
requested=item.requested,
monitored=item.monitored,
disambiguation=item.disambiguation,
score=item.score,
))
type_order = {"artist": 0, "album": 1}
suggestions.sort(key=lambda s: (-s.score, type_order.get(s.type, 2), s.title.lower()))
return SuggestResponse(results=suggestions[:limit])