import asyncio import logging import re import time import unicodedata from math import ceil from typing import Optional, TYPE_CHECKING from api.v1.schemas.search import SearchResult, SearchResponse, SuggestResult, SuggestResponse from repositories.protocols import MusicBrainzRepositoryProtocol, LidarrRepositoryProtocol, CoverArtRepositoryProtocol from services.preferences_service import PreferencesService from infrastructure.http.deduplication import deduplicate if TYPE_CHECKING: from services.audiodb_image_service import AudioDBImageService from services.audiodb_browse_queue import AudioDBBrowseQueue logger = logging.getLogger(__name__) COVER_PREFETCH_LIMIT = 12 SEARCH_CACHE_TTL = 90 SEARCH_CACHE_MAX_SIZE = 200 TOP_RESULT_SCORE_THRESHOLD = 90 class SearchService: _search_cache: dict[str, tuple[float, SearchResponse]] = {} def __init__( self, mb_repo: MusicBrainzRepositoryProtocol, lidarr_repo: LidarrRepositoryProtocol, coverart_repo: CoverArtRepositoryProtocol, preferences_service: PreferencesService, audiodb_image_service: "AudioDBImageService | None" = None, audiodb_browse_queue: "AudioDBBrowseQueue | None" = None, ): self._mb_repo = mb_repo self._lidarr_repo = lidarr_repo self._coverart_repo = coverart_repo self._preferences_service = preferences_service self._audiodb_image_service = audiodb_image_service self._audiodb_browse_queue = audiodb_browse_queue async def _safe_gather(self, *tasks): results = await asyncio.gather(*tasks, return_exceptions=True) return [r if not isinstance(r, Exception) else None for r in results] @staticmethod def _normalize_tokens(text: str) -> set[str]: """Strip diacritics and non-alphanumeric chars, then tokenize.""" nfkd = unicodedata.normalize("NFKD", text.lower()) stripped = "".join(c for c in nfkd if not unicodedata.combining(c)) cleaned = re.sub(r"[^a-z0-9\s]", "", stripped) return set(cleaned.split()) @staticmethod def _tokens_match(query_tokens: set[str], title_tokens: set[str]) -> bool: """Check token overlap allowing prefix matching for partial queries.""" min_prefix = 2 if all( any(qt == tt or (len(qt) >= min_prefix and tt.startswith(qt)) for tt in title_tokens) for qt in query_tokens ): return True if all( any(tt == qt or (len(tt) >= min_prefix and qt.startswith(tt)) for qt in query_tokens) for tt in title_tokens ): return True return False @staticmethod def _detect_top_result(results: list[SearchResult], query: str) -> SearchResult | None: if not results: return None best = results[0] if best.score < TOP_RESULT_SCORE_THRESHOLD: return None query_tokens = SearchService._normalize_tokens(query) title_tokens = SearchService._normalize_tokens(best.title) if not query_tokens or not title_tokens: return None if SearchService._tokens_match(query_tokens, title_tokens): return best return None async def _apply_audiodb_search_overlay(self, results: list[SearchResult]) -> None: if self._audiodb_image_service is None: return tasks = [] task_indices = [] for i, item in enumerate(results): if not item.musicbrainz_id: continue if item.type == "artist": tasks.append(self._audiodb_image_service.get_cached_artist_images(item.musicbrainz_id)) task_indices.append(i) elif item.type == "album": tasks.append(self._audiodb_image_service.get_cached_album_images(item.musicbrainz_id)) task_indices.append(i) if not tasks: return images_results = await asyncio.gather(*tasks, return_exceptions=True) for idx, images in zip(task_indices, images_results): item = results[idx] if isinstance(images, Exception): logger.warning("AudioDB search overlay failed for %s %s: %s", item.type, item.musicbrainz_id[:8], images) continue try: if item.type == "artist": if images and not images.is_negative: if not item.thumb_url and images.thumb_url: item.thumb_url = images.thumb_url if not item.fanart_url and images.fanart_url: item.fanart_url = images.fanart_url if not item.banner_url and images.banner_url: item.banner_url = images.banner_url elif images is None and self._audiodb_browse_queue: settings = self._preferences_service.get_advanced_settings() if settings.audiodb_enabled: await self._audiodb_browse_queue.enqueue( "artist", item.musicbrainz_id, name=item.title, ) elif item.type == "album": if images and not images.is_negative: if not item.album_thumb_url and images.album_thumb_url: item.album_thumb_url = images.album_thumb_url elif images is None and self._audiodb_browse_queue: settings = self._preferences_service.get_advanced_settings() if settings.audiodb_enabled: await self._audiodb_browse_queue.enqueue( "album", item.musicbrainz_id, name=item.title, artist_name=item.artist, ) except Exception as e: # noqa: BLE001 logger.warning("AudioDB search overlay apply failed for %s %s: %s", item.type, item.musicbrainz_id[:8], e) @deduplicate(lambda self, query, limit_artists=10, limit_albums=10, buckets=None: f"search:{query}:{limit_artists}:{limit_albums}:{buckets}") async def search( self, query: str, limit_artists: int = 10, limit_albums: int = 10, buckets: Optional[list[str]] = None ) -> SearchResponse: cache_key = f"{query.strip().lower()}:{limit_artists}:{limit_albums}:{','.join(sorted(buckets)) if buckets else ''}" now = time.monotonic() cached = self._search_cache.get(cache_key) if cached and (now - cached[0]) < SEARCH_CACHE_TTL: return cached[1] prefs = self._preferences_service.get_preferences() included_secondary_types = set(t.lower() for t in prefs.secondary_types) limits = {} if not buckets or "artists" in buckets: limits["artists"] = limit_artists if not buckets or "albums" in buckets: limits["albums"] = limit_albums try: grouped, library_mbids_raw, queue_items_raw = await self._safe_gather( self._mb_repo.search_grouped( query, limits=limits, buckets=buckets, included_secondary_types=included_secondary_types ), self._lidarr_repo.get_library_mbids(include_release_ids=True), self._lidarr_repo.get_queue(), ) except Exception as e: # noqa: BLE001 logger.error(f"Search gather failed unexpectedly: {e}") grouped, library_mbids_raw, queue_items_raw = None, None, None if grouped is None: logger.warning("MusicBrainz search returned no results or failed") grouped = grouped or {"artists": [], "albums": []} library_mbids = library_mbids_raw or set() if queue_items_raw: queued_mbids = {item.musicbrainz_id.lower() for item in queue_items_raw if item.musicbrainz_id} else: queued_mbids = set() for item in grouped.get("albums", []): mbid_lower = (item.musicbrainz_id or "").lower() item.in_library = mbid_lower in library_mbids item.requested = mbid_lower in queued_mbids and not item.in_library all_results = grouped.get("artists", []) + grouped.get("albums", []) await self._apply_audiodb_search_overlay(all_results) top_artist = self._detect_top_result(grouped.get("artists", []), query) top_album = self._detect_top_result(grouped.get("albums", []), query) response = SearchResponse( artists=grouped.get("artists", []), albums=grouped.get("albums", []), top_artist=top_artist, top_album=top_album, ) self._search_cache[cache_key] = (now, response) if len(self._search_cache) > SEARCH_CACHE_MAX_SIZE: expired = [k for k, (ts, _) in self._search_cache.items() if (now - ts) >= SEARCH_CACHE_TTL] for k in expired: del self._search_cache[k] if len(self._search_cache) > SEARCH_CACHE_MAX_SIZE: oldest_key = min(self._search_cache, key=lambda k: self._search_cache[k][0]) del self._search_cache[oldest_key] return response def schedule_cover_prefetch(self, albums: list[SearchResult]) -> list[str]: return [ item.musicbrainz_id for item in albums[:COVER_PREFETCH_LIMIT] if item.musicbrainz_id ] @deduplicate(lambda self, bucket, query, limit=50, offset=0: f"search_bucket:{bucket}:{query}:{limit}:{offset}") async def search_bucket( self, bucket: str, query: str, limit: int = 50, offset: int = 0 ) -> tuple[list[SearchResult], SearchResult | None]: prefs = self._preferences_service.get_preferences() included_secondary_types = set(t.lower() for t in prefs.secondary_types) if bucket == "artists": results = await self._mb_repo.search_artists(query, limit=limit, offset=offset) elif bucket == "albums": results = await self._mb_repo.search_albums( query, limit=limit, offset=offset, included_secondary_types=included_secondary_types ) else: return [], None if bucket == "albums": library_mbids_raw, queue_items_raw = await self._safe_gather( self._lidarr_repo.get_library_mbids(include_release_ids=True), self._lidarr_repo.get_queue(), ) library_mbids = library_mbids_raw or set() if queue_items_raw: queued_mbids = {item.musicbrainz_id.lower() for item in queue_items_raw if item.musicbrainz_id} else: queued_mbids = set() for item in results: mbid_lower = (item.musicbrainz_id or "").lower() item.in_library = mbid_lower in library_mbids item.requested = mbid_lower in queued_mbids and not item.in_library await self._apply_audiodb_search_overlay(results) top_result = self._detect_top_result(results, query) if offset == 0 else None return results, top_result @deduplicate(lambda self, query, limit=5: f"suggest:{query.strip().lower()}:{limit}") async def suggest(self, query: str, limit: int = 5) -> SuggestResponse: query = query.strip() if len(query) < 2: return SuggestResponse() prefs = self._preferences_service.get_preferences() included_secondary_types = set(t.lower() for t in prefs.secondary_types) bucket_limit = ceil(limit * 0.6) try: grouped = await self._mb_repo.search_grouped( query, limits={"artists": bucket_limit, "albums": bucket_limit}, included_secondary_types=included_secondary_types, ) except Exception as e: # noqa: BLE001 logger.warning("MusicBrainz suggest failed (query_len=%d): %s", len(query), type(e).__name__) return SuggestResponse() grouped = grouped or {"artists": [], "albums": []} library_mbids_raw, queue_items_raw = await self._safe_gather( self._lidarr_repo.get_library_mbids(include_release_ids=True), self._lidarr_repo.get_queue(), ) library_mbids = library_mbids_raw or set() if queue_items_raw: queued_mbids = {item.musicbrainz_id.lower() for item in queue_items_raw if item.musicbrainz_id} else: queued_mbids = set() for item in grouped.get("albums", []): mbid_lower = (item.musicbrainz_id or "").lower() item.in_library = mbid_lower in library_mbids item.requested = mbid_lower in queued_mbids and not item.in_library suggestions: list[SuggestResult] = [] for item in grouped.get("artists", []) + grouped.get("albums", []): suggestions.append(SuggestResult( type=item.type, title=item.title, artist=item.artist, year=item.year, musicbrainz_id=item.musicbrainz_id, in_library=item.in_library, requested=item.requested, disambiguation=item.disambiguation, score=item.score, )) type_order = {"artist": 0, "album": 1} suggestions.sort(key=lambda s: (-s.score, type_order.get(s.type, 2), s.title.lower())) return SuggestResponse(results=suggestions[:limit])