#!/usr/bin/env python3 from __future__ import annotations from datetime import date, timedelta from difflib import SequenceMatcher from app_config import get_settings from data_sources.anilist_source import fetch_anilist_movie_candidates from data_sources.animeschedule_source import fetch_animeschedule_anime_by_title from data_sources.tmdb_source import ( extract_de_theatrical_dates, fetch_tmdb_release_dates, search_tmdb_movies, select_release_in_range, ) FUZZY_MATCH_THRESHOLD = 0.65 SCHEDULE_MATCH_THRESHOLD = 2 def add_months(value: date, months: int) -> date: year = value.year + ((value.month - 1 + months) // 12) month = ((value.month - 1 + months) % 12) + 1 return date(year, month, 1) def normalize_title(text: str) -> str: cleaned = "".join(ch.lower() if ch.isalnum() else " " for ch in (text or "")) return " ".join(cleaned.split()) def fuzzy_ratio(left: str, right: str) -> float: a = normalize_title(left) b = normalize_title(right) if not a or not b: return 0.0 return SequenceMatcher(None, a, b).ratio() def format_date(value: date, locale: str) -> str: if locale == "de-DE": return f"{value.day:02d}.{value.month:02d}.{value.year}" return value.isoformat() def parse_record_release_date(record: dict) -> date: text = str(record.get("releaseDate") or "").strip() try: return date.fromisoformat(text) except ValueError: return date.max def is_year_match(anilist_year: int, tmdb_year: int) -> bool: if anilist_year <= 0 or tmdb_year <= 0: return False return abs(anilist_year - tmdb_year) <= 1 def title_score(anilist_english: str, anilist_romaji: str, anilist_native: str, tmdb_title: str, tmdb_original: str) -> tuple[int, int, int, float]: english_norm = normalize_title(anilist_english) romaji_norm = normalize_title(anilist_romaji) native_norm = normalize_title(anilist_native) tmdb_options = [normalize_title(tmdb_title), normalize_title(tmdb_original)] exact_english = 1 if english_norm and english_norm in tmdb_options else 0 exact_romaji = 1 if romaji_norm and romaji_norm in tmdb_options else 0 exact_native = 1 if native_norm and native_norm in tmdb_options else 0 best_ratio = 0.0 for option in (tmdb_title, tmdb_original): best_ratio = max( best_ratio, fuzzy_ratio(anilist_english, option), fuzzy_ratio(anilist_romaji, option), fuzzy_ratio(anilist_native, option), ) return exact_english, exact_romaji, exact_native, best_ratio def collect_tmdb_candidates_for_anilist(anilist_entry: dict, tmdb_token: str, search_cache: dict[str, list[dict]]) -> list[dict]: english = (anilist_entry.get("title_english") or "").strip() romaji = (anilist_entry.get("title_romaji") or "").strip() native = (anilist_entry.get("title_native") or "").strip() queries = [] if english: queries.append(english) if romaji and normalize_title(romaji) != normalize_title(english): queries.append(romaji) if native and normalize_title(native) not in {normalize_title(english), normalize_title(romaji)}: queries.append(native) if not queries: return [] candidates_by_id = {} languages = ["de-DE", "en-US", "ja-JP"] for query in queries: normalized_query = normalize_title(query) for language in languages: cache_key = f"{language}:{normalized_query}" if cache_key not in search_cache: search_cache[cache_key] = search_tmdb_movies(query, tmdb_token, language=language) for item in search_cache[cache_key]: tmdb_id = int(item.get("tmdb_id") or 0) if tmdb_id <= 0: continue existing = candidates_by_id.get(tmdb_id) if not existing: candidates_by_id[tmdb_id] = item continue # Prefer candidate carrying a release date if duplicate appears from different language queries. if not existing.get("release_date") and item.get("release_date"): candidates_by_id[tmdb_id] = item return list(candidates_by_id.values()) def pick_best_tmdb_match(anilist_entry: dict, tmdb_candidates: list[dict]) -> dict | None: english = (anilist_entry.get("title_english") or "").strip() romaji = (anilist_entry.get("title_romaji") or "").strip() native = (anilist_entry.get("title_native") or "").strip() anilist_year = int(anilist_entry.get("start_year") or 0) best = None best_tuple = (-1, -1, -1, 0.0) for candidate in tmdb_candidates: exact_english, exact_romaji, exact_native, ratio = title_score( english, romaji, native, str(candidate.get("title") or ""), str(candidate.get("original_title") or ""), ) tmdb_release = candidate.get("release_date") tmdb_year = tmdb_release.year if tmdb_release else 0 has_exact_match = exact_english == 1 or exact_romaji == 1 or exact_native == 1 if not has_exact_match and not is_year_match(anilist_year, tmdb_year): continue if ratio < FUZZY_MATCH_THRESHOLD and exact_english == 0 and exact_romaji == 0 and exact_native == 0: continue score_tuple = (exact_english, exact_romaji, exact_native, ratio) if score_tuple > best_tuple: best_tuple = score_tuple best = candidate return best def resolve_titles(anilist_entry: dict, schedule_token: str, schedule_cache: dict[str, dict]) -> tuple[str, str]: english = (anilist_entry.get("title_english") or "").strip() romaji = (anilist_entry.get("title_romaji") or "").strip() native = (anilist_entry.get("title_native") or "").strip() schedule_english = "" if not english and romaji: schedule = fetch_animeschedule_anime_by_title(romaji, schedule_token, schedule_cache) if int(schedule.get("match_score") or 0) >= SCHEDULE_MATCH_THRESHOLD: schedule_english = (schedule.get("english_title") or "").strip() preferred_title = schedule_english or english or romaji or native return preferred_title, schedule_english def build_record(anilist_entry: dict, tmdb_entry: dict, release_date: date, locale: str, title: str, schedule_english: str) -> dict: return { "title": title, "title_english_anilist": (anilist_entry.get("title_english") or "").strip(), "title_anilist": (anilist_entry.get("title_english") or "").strip() or (anilist_entry.get("title_romaji") or "").strip(), "title_schedule_english": schedule_english, "title_romaji": (anilist_entry.get("title_romaji") or "").strip(), "title_native": (anilist_entry.get("title_native") or "").strip(), "studio": (anilist_entry.get("studio_text") or "").strip(), "genres": (anilist_entry.get("genres_text") or "").strip() or "n/a", "tags": (anilist_entry.get("tags_text") or "").strip(), "release": format_date(release_date, locale), "releaseDate": release_date.isoformat(), "anilist_url": (anilist_entry.get("anilist_url") or "").strip() or "n/a", "format": (anilist_entry.get("format") or "").strip() or "MOVIE", "cover_image": (anilist_entry.get("cover_image") or "").strip(), "description": (anilist_entry.get("description") or "").strip(), "tmdb_title": (tmdb_entry.get("title") or "").strip(), "tmdb_id": int(tmdb_entry.get("tmdb_id") or 0), "ids": { "anilist": int(anilist_entry.get("anilist_id") or 0), "tmdb": int(tmdb_entry.get("tmdb_id") or 0), }, } def sort_dedup_records(records: list[dict]) -> list[dict]: unique = {} for record in records: ids = record.get("ids") or {} key = (int(ids.get("anilist") or 0), int(ids.get("tmdb") or 0), str(record.get("releaseDate") or "")) if key not in unique: unique[key] = record result = list(unique.values()) result.sort(key=lambda item: (parse_record_release_date(item), str(item.get("title") or ""))) return result def get_upcoming_movie_records( locale: str, today: date | None = None, anime_schedule_token: str | None = None, tmdb_read_access_token: str | None = None, ) -> list[dict]: settings = get_settings() schedule_token = (anime_schedule_token or settings.animeschedule_api_token).strip() tmdb_token = (tmdb_read_access_token or settings.tmdb_read_access_token).strip() if not tmdb_token: return [] current_day = today or date.today() month_start = date(current_day.year, current_day.month, 1) end_date = add_months(month_start, 2) - timedelta(days=1) anilist_candidates = fetch_anilist_movie_candidates(current_day) schedule_cache: dict[str, dict] = {} tmdb_search_cache: dict[str, list[dict]] = {} tmdb_release_cache: dict[int, dict] = {} output_records = [] for anilist_entry in anilist_candidates: tmdb_candidates = collect_tmdb_candidates_for_anilist(anilist_entry, tmdb_token, tmdb_search_cache) if not tmdb_candidates: continue matched_tmdb = pick_best_tmdb_match(anilist_entry, tmdb_candidates) if not matched_tmdb: continue tmdb_id = int(matched_tmdb.get("tmdb_id") or 0) if tmdb_id <= 0: continue if tmdb_id not in tmdb_release_cache: tmdb_release_cache[tmdb_id] = fetch_tmdb_release_dates(tmdb_id, tmdb_token) release_payload = tmdb_release_cache[tmdb_id] de_theatrical_dates = extract_de_theatrical_dates(release_payload) release_date = select_release_in_range(de_theatrical_dates, current_day, end_date) if not release_date: candidate_release = matched_tmdb.get("release_date") if isinstance(candidate_release, date) and current_day <= candidate_release <= end_date: release_date = candidate_release else: continue preferred_title, schedule_english = resolve_titles(anilist_entry, schedule_token, schedule_cache) if not preferred_title: continue output_records.append( build_record( anilist_entry=anilist_entry, tmdb_entry=matched_tmdb, release_date=release_date, locale=locale, title=preferred_title, schedule_english=schedule_english, ) ) return sort_dedup_records(output_records)