Anime-Movies-Upcomming-Real…/data_sources/animeschedule_source.py

#!/usr/bin/env python3
from __future__ import annotations

import json
import urllib.parse
import urllib.request

ANIMESCHEDULE_API_BASE = "https://animeschedule.net/api/v3"


def normalize_title(text: str) -> str:
    cleaned = "".join(ch.lower() if ch.isalnum() else " " for ch in (text or ""))
    return " ".join(cleaned.split())


def flatten_items(payload: dict | list) -> list[dict]:
    if isinstance(payload, list):
        return [item for item in payload if isinstance(item, dict)]
    if not isinstance(payload, dict):
        return []

    for key in ("anime", "items", "data", "results"):
        value = payload.get(key)
        if isinstance(value, list):
            return [item for item in value if isinstance(item, dict)]
    return []


def extract_names(item: dict) -> list[str]:
    names = []
    direct = [item.get("title"), item.get("name"), item.get("romaji"), item.get("english"), item.get("native")]
    for candidate in direct:
        text = (candidate or "").strip()
        if text:
            names.append(text)

    nested = item.get("names") or {}
    if isinstance(nested, dict):
        for candidate in nested.values():
            if isinstance(candidate, list):
                for value in candidate:
                    text = (str(value) if value is not None else "").strip()
                    if text:
                        names.append(text)
                continue

            text = (str(candidate) if candidate is not None else "").strip()
            if text:
                names.append(text)

    unique = []
    seen = set()
    for name in names:
        lowered = name.lower()
        if lowered in seen:
            continue
        seen.add(lowered)
        unique.append(name)
    return unique


def extract_url(item: dict) -> str:
    slug = (item.get("slug") or item.get("route") or item.get("id") or "").strip()
    if slug:
        return f"https://animeschedule.net/anime/{slug}"

    websites = item.get("websites") or item.get("links") or []
    if isinstance(websites, list):
        for entry in websites:
            if not isinstance(entry, dict):
                continue
            url = (entry.get("url") or "").strip()
            if url:
                return url
    return ""


def extract_english_title(item: dict) -> str:
    direct = [item.get("english"), item.get("titleEnglish"), item.get("englishTitle")]
    for candidate in direct:
        text = (candidate or "").strip()
        if text:
            return text

    names = item.get("names") or {}
    if isinstance(names, dict):
        for key, value in names.items():
            key_norm = str(key).strip().lower()
            if key_norm in {"en", "eng", "english", "titleenglish"}:
                text = (str(value) if value is not None else "").strip()
                if text:
                    return text
    return ""


def extract_list(item: dict, *keys: str) -> str:
    for key in keys:
        value = item.get(key)
        if not value:
            continue
        if isinstance(value, str):
            text = value.strip()
            if text:
                return text
        if isinstance(value, list):
            names = []
            for entry in value:
                if isinstance(entry, str):
                    text = entry.strip()
                elif isinstance(entry, dict):
                    text = (entry.get("name") or entry.get("title") or "").strip()
                else:
                    text = ""
                if text:
                    names.append(text)
            if names:
                return ", ".join(names)
    return ""


def extract_format(item: dict) -> str:
    for key in ("format", "mediaType", "type"):
        value = item.get(key)
        text = (str(value) if value is not None else "").strip()
        if text:
            return text
    return ""


def extract_description(item: dict) -> str:
    for key in ("description", "synopsis", "overview", "summary"):
        value = item.get(key)
        text = (str(value) if value is not None else "").strip()
        if text:
            return text

    details = item.get("details") or {}
    if isinstance(details, dict):
        for key in ("description", "synopsis", "overview", "summary"):
            value = details.get(key)
            text = (str(value) if value is not None else "").strip()
            if text:
                return text
    return ""


def empty_result() -> dict:
    return {
        "url": "",
        "title": "",
        "english_title": "",
        "format": "",
        "genres": "",
        "studios": "",
        "description": "",
        "names": [],
        "match_score": 0,
        "raw": {},
    }


def title_match_score(wanted: str, names: list[str]) -> int:
    wanted_norm = normalize_title(wanted)
    if not wanted_norm:
        return 0

    best = 0
    wanted_parts = wanted_norm.split(" ")

    for candidate in names:
        normalized = normalize_title(candidate)
        if not normalized:
            continue

        if normalized == wanted_norm:
            best = max(best, 4)
            continue

        if wanted_norm in normalized or normalized in wanted_norm:
            best = max(best, 3)
            continue

        normalized_parts = normalized.split(" ")
        if normalized_parts[:2] == wanted_parts[:2] and len(normalized_parts) >= 2 and len(wanted_parts) >= 2:
            best = max(best, 2)
            continue

        if set(normalized_parts) & set(wanted_parts):
            best = max(best, 1)

    return best


def fetch_animeschedule_anime_by_title(title: str, token: str, cache: dict[str, dict]) -> dict:
    key = normalize_title(title)
    if key in cache:
        return cache[key]

    if not token:
        cache[key] = empty_result()
        return cache[key]

    params = {"search": title, "take": "10"}
    url = ANIMESCHEDULE_API_BASE + "/anime?" + urllib.parse.urlencode(params)
    req = urllib.request.Request(
        url,
        headers={
            "Accept": "application/json",
            "Authorization": f"Bearer {token}",
            "User-Agent": "anime-movies-script/1.0",
        },
    )

    try:
        with urllib.request.urlopen(req, timeout=20) as resp:
            payload = json.loads(resp.read().decode("utf-8"))
    except Exception:
        cache[key] = empty_result()
        return cache[key]

    candidates = flatten_items(payload)
    if not candidates:
        cache[key] = empty_result()
        return cache[key]

    best = None
    best_score = -1

    for item in candidates:
        names = extract_names(item)
        if not names:
            continue

        score = title_match_score(title, names)
        if score <= 0:
            continue

        if score > best_score:
            best_score = score
            best = item
            if score == 4:
                break

    if not best:
        cache[key] = empty_result()
        return cache[key]

    names = extract_names(best)
    cache[key] = {
        "url": extract_url(best),
        "title": names[0] if names else "",
        "english_title": extract_english_title(best),
        "format": extract_format(best),
        "genres": extract_list(best, "genres", "genre", "categories"),
        "studios": extract_list(best, "studios", "studio"),
        "description": extract_description(best),
        "names": names,
        "match_score": best_score,
        "raw": best,
    }
    return cache[key]