Add modular TMDb-first movie pipeline and Discord bot

This commit is contained in:
ProgrammGamer
2026-04-21 21:43:35 +02:00
commit bbba110268
13 changed files with 1283 additions and 0 deletions

118
data_sources/tmdb_source.py Normal file
View File

@@ -0,0 +1,118 @@
#!/usr/bin/env python3
from __future__ import annotations
import json
import urllib.parse
import urllib.request
from datetime import date
TMDB_API_BASE = "https://api.themoviedb.org/3"
TMDB_ANIME_KEYWORD_ID = "210024"
def parse_release_date(value: str) -> date | None:
text = (value or "").strip()
if not text:
return None
try:
return date.fromisoformat(text[:10])
except ValueError:
return None
def tmdb_get_json(path: str, params: dict, token: str) -> dict:
url = TMDB_API_BASE + path + "?" + urllib.parse.urlencode(params)
req = urllib.request.Request(
url,
headers={
"Accept": "application/json",
"Authorization": f"Bearer {token}",
"User-Agent": "anime-movies-script/1.0",
},
)
with urllib.request.urlopen(req, timeout=20) as resp:
return json.loads(resp.read().decode("utf-8"))
def poster_url(path: str) -> str:
text = (path or "").strip()
if not text:
return ""
return "https://image.tmdb.org/t/p/w780" + text
def fetch_genre_map(token: str, language: str = "de-DE") -> dict[int, str]:
payload = tmdb_get_json("/genre/movie/list", {"language": language}, token)
result = {}
for entry in payload.get("genres", []):
try:
gid = int(entry.get("id"))
except Exception:
continue
name = (entry.get("name") or "").strip()
if name:
result[gid] = name
return result
def fetch_tmdb_anime_movies(start_date: date, end_date: date, token: str, language: str = "de-DE") -> list[dict]:
if not token:
return []
genre_map = fetch_genre_map(token, language=language)
page = 1
results = []
seen_ids = set()
while True:
params = {
"include_adult": "false",
"include_video": "false",
"language": language,
"sort_by": "primary_release_date.desc",
"primary_release_date.gte": start_date.isoformat(),
"primary_release_date.lte": end_date.isoformat(),
"with_keywords": TMDB_ANIME_KEYWORD_ID,
"page": str(page),
}
payload = tmdb_get_json("/discover/movie", params, token)
page_results = payload.get("results", [])
if not page_results:
break
for movie in page_results:
movie_id = movie.get("id")
if movie_id in seen_ids:
continue
release_date = parse_release_date(str(movie.get("release_date") or ""))
if not release_date:
continue
seen_ids.add(movie_id)
genre_names = [genre_map.get(gid, "") for gid in (movie.get("genre_ids") or [])]
genre_names = [name for name in genre_names if name]
results.append(
{
"tmdb_id": movie_id,
"title": (movie.get("title") or "").strip(),
"original_title": (movie.get("original_title") or "").strip(),
"overview": (movie.get("overview") or "").strip(),
"release_date": release_date,
"genres": genre_names,
"genres_text": ", ".join(genre_names),
"poster_url": poster_url(str(movie.get("poster_path") or "")),
"popularity": movie.get("popularity"),
"vote_average": movie.get("vote_average"),
"vote_count": movie.get("vote_count"),
"raw": movie,
}
)
total_pages = int(payload.get("total_pages") or page)
if page >= total_pages:
break
page += 1
return results