Add modular TMDb-first movie pipeline and Discord bot
This commit is contained in:
163
movie_pipeline.py
Normal file
163
movie_pipeline.py
Normal file
@@ -0,0 +1,163 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date, timedelta
|
||||
|
||||
from app_config import get_settings
|
||||
from data_sources.anilist_source import fetch_anilist_movie_by_search
|
||||
from data_sources.animeschedule_source import fetch_animeschedule_anime_by_title
|
||||
from data_sources.tmdb_source import fetch_tmdb_anime_movies
|
||||
|
||||
|
||||
def add_months(d: date, months: int) -> date:
|
||||
year = d.year + ((d.month - 1 + months) // 12)
|
||||
month = ((d.month - 1 + months) % 12) + 1
|
||||
return date(year, month, 1)
|
||||
|
||||
|
||||
def format_date(d: date, locale: str) -> str:
|
||||
if locale == "de-DE":
|
||||
return f"{d.day:02d}.{d.month:02d}.{d.year}"
|
||||
return d.isoformat()
|
||||
|
||||
|
||||
def select_title(anilist: dict, schedule: dict, tmdb: dict) -> str:
|
||||
return (
|
||||
(schedule.get("english_title") or "").strip()
|
||||
or (anilist.get("title_english") or "").strip()
|
||||
or (anilist.get("title_best") or "").strip()
|
||||
or (tmdb.get("title") or "").strip()
|
||||
or (tmdb.get("original_title") or "").strip()
|
||||
)
|
||||
|
||||
|
||||
def structure_movie_record(tmdb_movie: dict, anilist: dict, schedule: dict, locale: str) -> dict:
|
||||
release_date = tmdb_movie.get("release_date")
|
||||
release = format_date(release_date, locale) if isinstance(release_date, date) else "n/a"
|
||||
|
||||
has_anilist = bool(anilist)
|
||||
has_schedule = bool(schedule and (schedule.get("title") or schedule.get("english_title") or schedule.get("url")))
|
||||
|
||||
final_description = (
|
||||
(tmdb_movie.get("overview") or "").strip()
|
||||
or (anilist.get("description") or "").strip()
|
||||
or (schedule.get("description") or "").strip()
|
||||
or "n/a"
|
||||
)
|
||||
|
||||
# User requirement: cover images should come from AniList.
|
||||
cover_image = (anilist.get("cover_image") or "").strip()
|
||||
|
||||
genres_text = (
|
||||
(tmdb_movie.get("genres_text") or "").strip()
|
||||
or (anilist.get("genres_text") or "").strip()
|
||||
or (schedule.get("genres") or "").strip()
|
||||
or "n/a"
|
||||
)
|
||||
|
||||
record = {
|
||||
"title": select_title(anilist, schedule, tmdb_movie),
|
||||
"title_english_anilist": (anilist.get("title_english") or "").strip(),
|
||||
"title_anilist": (anilist.get("title_best") or "").strip(),
|
||||
"title_schedule_english": (schedule.get("english_title") or "").strip(),
|
||||
"title_romaji": (anilist.get("title_romaji") or "").strip() or "n/a",
|
||||
"title_native": (anilist.get("title_native") or "").strip() or "n/a",
|
||||
"studio": (anilist.get("studio_text") or "").strip() or (schedule.get("studios") or "").strip() or "n/a",
|
||||
"genres": genres_text,
|
||||
"tags": (anilist.get("tags_text") or "").strip() or "n/a",
|
||||
"release": release,
|
||||
"anilist_url": (anilist.get("anilist_url") or "").strip() or "n/a",
|
||||
"format": (anilist.get("format") or "").strip() or (schedule.get("format") or "").strip() or "MOVIE",
|
||||
"episodes": anilist.get("episodes") or "n/a",
|
||||
"duration": anilist.get("duration") or "n/a",
|
||||
"source": (anilist.get("source") or "").strip() or "n/a",
|
||||
"cover_image": cover_image,
|
||||
"description": final_description,
|
||||
"schedule_url": (schedule.get("url") or "").strip(),
|
||||
"schedule_title": (schedule.get("title") or "").strip(),
|
||||
"release_source": "TMDb DE",
|
||||
"source_presence": {
|
||||
"tmdb": True,
|
||||
"anilist": has_anilist,
|
||||
"animeschedule": has_schedule,
|
||||
},
|
||||
"tmdb": tmdb_movie,
|
||||
"anilist": anilist,
|
||||
"animeschedule": schedule,
|
||||
}
|
||||
return record
|
||||
|
||||
|
||||
def sort_and_structure_movies(tmdb_movies: list[dict], locale: str, schedule_token: str) -> list[dict]:
|
||||
records = []
|
||||
anilist_cache: dict[str, dict | None] = {}
|
||||
schedule_cache: dict[str, dict] = {}
|
||||
|
||||
sorted_tmdb = sorted(
|
||||
tmdb_movies,
|
||||
key=lambda item: item.get("release_date") or date.min,
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
for tmdb_movie in sorted_tmdb:
|
||||
anilist = None
|
||||
for candidate in ((tmdb_movie.get("title") or "").strip(), (tmdb_movie.get("original_title") or "").strip()):
|
||||
if not candidate:
|
||||
continue
|
||||
anilist = fetch_anilist_movie_by_search(candidate, anilist_cache)
|
||||
if anilist:
|
||||
break
|
||||
anilist = anilist or {}
|
||||
|
||||
schedule_candidates = [
|
||||
(anilist.get("title_english") or "").strip(),
|
||||
(anilist.get("title_best") or "").strip(),
|
||||
(tmdb_movie.get("title") or "").strip(),
|
||||
(tmdb_movie.get("original_title") or "").strip(),
|
||||
]
|
||||
|
||||
best_schedule = {}
|
||||
best_schedule_score = 0
|
||||
seen_schedule_candidates = set()
|
||||
for schedule_candidate in schedule_candidates:
|
||||
if not schedule_candidate:
|
||||
continue
|
||||
norm_key = schedule_candidate.lower()
|
||||
if norm_key in seen_schedule_candidates:
|
||||
continue
|
||||
seen_schedule_candidates.add(norm_key)
|
||||
|
||||
candidate_result = fetch_animeschedule_anime_by_title(schedule_candidate, schedule_token, schedule_cache)
|
||||
score = int(candidate_result.get("match_score") or 0)
|
||||
if score > best_schedule_score:
|
||||
best_schedule = candidate_result
|
||||
best_schedule_score = score
|
||||
|
||||
# Prefer results that have explicit English title when scores are tied.
|
||||
if score == best_schedule_score and score > 0:
|
||||
if not (best_schedule.get("english_title") or "").strip() and (candidate_result.get("english_title") or "").strip():
|
||||
best_schedule = candidate_result
|
||||
|
||||
schedule = best_schedule if best_schedule_score > 0 else {}
|
||||
|
||||
records.append(structure_movie_record(tmdb_movie, anilist, schedule, locale))
|
||||
|
||||
return records
|
||||
|
||||
|
||||
def get_upcoming_movie_records(
|
||||
locale: str,
|
||||
today: date | None = None,
|
||||
anime_schedule_token: str | None = None,
|
||||
tmdb_read_access_token: str | None = None,
|
||||
) -> list[dict]:
|
||||
settings = get_settings()
|
||||
schedule_token = (anime_schedule_token or settings.animeschedule_api_token).strip()
|
||||
tmdb_token = (tmdb_read_access_token or settings.tmdb_read_access_token).strip()
|
||||
|
||||
current_day = today or date.today()
|
||||
month_start = date(current_day.year, current_day.month, 1)
|
||||
end_date = add_months(month_start, 2) - timedelta(days=1)
|
||||
|
||||
tmdb_movies = fetch_tmdb_anime_movies(current_day, end_date, tmdb_token, language=locale)
|
||||
return sort_and_structure_movies(tmdb_movies, locale, schedule_token)
|
||||
Reference in New Issue
Block a user