Media_Scripts/rename.py

import os
import re
import io
import sys
import requests
import xml.etree.ElementTree as ET
import argparse
import json
import time
from datetime import datetime, timedelta

HISTORY_FILE = "regex_history.txt"
CACHE_DIR = "anidb_cache"
CACHE_EXPIRY_SECONDS = 86400  # 24 hours in seconds

# ==============================================================================
# CORE LOGIC (GUI-agnostic)
# ==============================================================================

def fetch_anidb_data_core(folder_path):
    """
    Fetch episode data from AniDB based on folder name pattern {anidbN-X}.
    Returns: (episodes_list, error_message)
    episodes_list is list of tuples: [(epno, title), ...]
    """
    folder_name = os.path.basename(folder_path)

    # Extract anidb ID using regex (handles patterns like {anidb4-18874})
    match = re.search(r'\{anidb\d+-(\d+)\}', folder_name)
    if not match:
        return None, "Could not find valid AniDB ID in folder name.\nFolder name should contain pattern like: {anidb4-18874}"

    aid = match.group(1)

    # Check if cache exists and is valid
    cache_file = os.path.join(CACHE_DIR, f"{aid}.json")
    cache_data = None

    if os.path.exists(cache_file):
        try:
            with open(cache_file, 'r', encoding='utf-8') as f:
                cache_data = json.load(f)

            # Check if cache is still valid (not expired)
            cache_timestamp = cache_data.get('timestamp', 0)
            if cache_timestamp > time.time() - CACHE_EXPIRY_SECONDS:
                episodes = cache_data.get('episodes', [])
                if episodes:
                    # Sort episodes by number
                    episodes.sort(key=lambda x: x[0])
                    return episodes, None
        except (IOError, json.JSONDecodeError, KeyError):
            # If cache is corrupted, continue to fetch fresh data
            pass

    # Fetch XML from AniDB API
    url = f"http://api.anidb.net:9001/httpapi?request=anime&client=testdesktop&clientver=1&protover=1&aid={aid}"
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        return None, f"Failed to fetch AniDB data:\n{str(e)}"

    # Parse XML and extract episodes
    try:
        root = ET.fromstring(response.content)
    except ET.ParseError as e:
        return None, f"Failed to parse AniDB response:\n{str(e)}"

    # Extract regular episodes (type="1") with their numbers and titles
    episodes = []
    for episode in root.findall('.//episode'):
        epno_elem = episode.find('epno')
        if epno_elem is not None and epno_elem.get('type') == '1':
            try:
                epno = int(epno_elem.text)
            except (ValueError, TypeError):
                continue

            # Get all title elements for this episode
            title_elements = episode.findall('title')

            # Create a dictionary of titles by language
            titles_by_lang = {}
            for title_elem in title_elements:
                lang = title_elem.get('{http://www.w3.org/XML/1998/namespace}lang')
                if lang and title_elem.text:
                    titles_by_lang[lang] = title_elem.text

            # Try to get English title first, then romanized (x-jat), then Japanese
            title = None
            for lang in ['en', 'x-jat', 'ja']:
                if lang in titles_by_lang:
                    title = titles_by_lang[lang]
                    break

            # Fallback if no preferred language found
            if title is None and title_elements:
                for title_elem in title_elements:
                    if title_elem.text and title_elem.text.strip():
                        title = title_elem.text
                        break

            if title is None:
                title = f"Episode {epno}"

            episodes.append((epno, title))

    if not episodes:
        return None, "No regular episodes were found in the AniDB response."

    # Sort episodes by number
    episodes.sort(key=lambda x: x[0])

    # Save to cache
    try:
        # Create cache directory if it doesn't exist
        os.makedirs(CACHE_DIR, exist_ok=True)

        cache_info = {
            'aid': aid,
            'timestamp': time.time(),
            'episodes': episodes
        }

        with open(cache_file, 'w', encoding='utf-8') as f:
            json.dump(cache_info, f, indent=2)
    except IOError:
        # Cache save failure is not critical, continue with episodes
        pass

    return episodes, None

def format_titles_from_episodes(episodes):
    """Convert episodes list to tab-separated string format."""
    return "\n".join([f"{epno}\t{title}" for epno, title in episodes])

def format_titles(titles_data):
    """Parse tab-separated titles into a dictionary."""
    lines = titles_data.strip().split('\n')
    return {
        int(parts[0]): parts[1].replace("`", "'")
        for line in lines if (parts := line.split('\t')) and len(parts) >= 2
    }

def is_valid_windows_filename(filename):
    """Check if filename is valid for Windows."""
    if not filename:
        return True  # Empty is valid (means no override)
    invalid_chars = r'[<>:"/\\|?*]'
    if re.search(invalid_chars, filename):
        return False
    if filename.endswith(' ') or filename.endswith('.'):
        return False
    reserved = {'CON', 'PRN', 'AUX', 'NUL'} | {f'COM{i}' for i in range(1, 10)} | {f'LPT{i}' for i in range(1, 10)}
    return filename.upper() not in reserved

def rename_files_core(folder_path, dry_run, regex_pattern, episode_offset, override_title, episode_group, episode_titles_dic):
    """
    Core renaming logic.
    Returns list of tuples: [(old_filename, new_filename_or_None, status), ...]
    """
    results = []
    if not os.path.isdir(folder_path):
        return results

    for filename in os.listdir(folder_path):
        full_path = os.path.join(folder_path, filename)
        if not os.path.isfile(full_path):
            continue

        match = re.match(regex_pattern, filename)
        if match:
            try:
                series_title = override_title if override_title else match.group(1)
                episode_number = int(match.group(episode_group)) + episode_offset
                episode_title = episode_titles_dic.get(episode_number, "Unknown Title")
                episode_title = re.sub(r'[\\/:*?"<>|]', '_', episode_title)
                new_filename = f"{series_title} - EP{episode_number:02d} - {episode_title}.mkv"
                new_full_path = os.path.join(folder_path, new_filename)

                if not dry_run:
                    os.rename(full_path, new_full_path)
                results.append((filename, new_filename, "renamed"))
            except (IndexError, ValueError) as e:
                results.append((filename, None, f"Error: {str(e)}"))
    return results

def save_regex_to_history(pattern, history_file=HISTORY_FILE):
    """Save regex pattern to history file."""
    if not pattern:
        return

    history = []
    if os.path.exists(history_file):
        with open(history_file, 'r', encoding='utf-8') as f:
            history = [line.strip() for line in f if line.strip()]

    if pattern not in history:
        history.insert(0, pattern)
        try:
            with open(history_file, 'w', encoding='utf-8') as f:
                f.write('\n'.join(history[:20]))
        except IOError:
            pass

def load_regex_history(history_file=HISTORY_FILE):
    """Load regex history from file."""
    if os.path.exists(history_file):
        with open(history_file, 'r', encoding='utf-8') as f:
            return [line.strip() for line in f if line.strip()]
    return []

def list_regex_history_cli(history_file):
    """Print regex history with indices for CLI"""
    history = load_regex_history(history_file)
    if not history:
        print("No regex history found.")
        return 0

    print("Saved regex patterns (use --regex-index N to select):")
    print("-" * 60)
    for i, pattern in enumerate(history):
        # Show full pattern but truncate if extremely long for display formatting
        display = pattern[:57] + "..." if len(pattern) > 60 else pattern
        marker = " (default)" if i == 0 and pattern == r'\[SubsPlease\] (.+?) - (\d{2}) \(1080p\) \[\w+\]\.mkv' else ""
        print(f"  [{i}] {display}{marker}")
    print("-" * 60)
    print(f"Total: {len(history)} patterns")
    return 0

# ==============================================================================
# GUI CLASS (LAZY IMPORT)
# ==============================================================================

class RenamerGUI:
    def __init__(self, history_file=HISTORY_FILE):
        # Import tkinter only when GUI is actually instantiated
        import tkinter as tk
        from tkinter import filedialog, scrolledtext, ttk, messagebox

        self.tk = tk
        self.filedialog = filedialog
        self.scrolledtext = scrolledtext
        self.ttk = ttk
        self.messagebox = messagebox

        self.history_file = history_file
        self.root = tk.Tk()
        self.root.title("File Renamer")
        self.root.geometry("850x650")

        self.style = ttk.Style()
        self.style.configure("Invalid.TEntry", fieldbackground="pink")

        self._build_ui()

    def _build_ui(self):
        tk = self.tk
        ttk = self.ttk

        notebook = ttk.Notebook(self.root)
        notebook.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)

        main_frame = ttk.Frame(notebook, padding="10")
        options_frame = ttk.Frame(notebook, padding="10")
        notebook.add(main_frame, text="Main")
        notebook.add(options_frame, text="Options")

        # Main Tab
        ttk.Label(main_frame, text="Folder Path:").grid(column=0, row=0, sticky=tk.W, pady=2)
        self.folder_entry = ttk.Entry(main_frame)
        self.folder_entry.grid(column=1, row=0, sticky="ew", padx=5, pady=2)
        ttk.Button(main_frame, text="Browse", command=self.select_folder).grid(column=2, row=0, sticky=tk.W, padx=5, pady=2)
        ttk.Button(main_frame, text="Fetch from AniDB", command=self.fetch_anidb_data).grid(column=3, row=0, sticky=tk.W, padx=5, pady=2)

        ttk.Label(main_frame, text="Regex Pattern:").grid(column=0, row=1, sticky=tk.W, pady=2)
        self.regex_entry = ttk.Combobox(main_frame, values=load_regex_history(self.history_file))
        self.regex_entry.grid(column=1, row=1, columnspan=3, sticky="ew", padx=5, pady=2)
        self.regex_entry.set(r'\[SubsPlease\] (.+?) - (\d{2}) \(1080p\) \[\w+\]\.mkv')

        ttk.Label(main_frame, text="Episode Titles:").grid(column=0, row=2, sticky=tk.W, pady=2)
        self.titles_text = self.scrolledtext.ScrolledText(main_frame, height=8)
        self.titles_text.grid(column=0, row=3, columnspan=4, sticky="nsew", padx=5, pady=2)

        self.dry_run_var = tk.BooleanVar(value=True)
        ttk.Checkbutton(main_frame, text="Dry Run", variable=self.dry_run_var).grid(column=0, row=4, sticky=tk.W, pady=2)
        ttk.Button(main_frame, text="Rename Files", command=self.start_renaming).grid(column=3, row=4, sticky=tk.E, pady=2)

        ttk.Label(main_frame, text="Log:").grid(column=0, row=5, sticky=tk.W, pady=2)
        self.log_text = self.scrolledtext.ScrolledText(main_frame)
        self.log_text.grid(column=0, row=6, columnspan=4, sticky="nsew", padx=5, pady=2)

        # Options Tab
        ttk.Label(options_frame, text="Episode Number Offset:").grid(column=0, row=0, sticky=tk.W, pady=2)
        self.offset_entry = ttk.Entry(options_frame, width=10)
        self.offset_entry.grid(column=1, row=0, sticky=tk.W, padx=5, pady=2)
        self.offset_entry.insert(0, "0")

        self.override_label = ttk.Label(options_frame, text="Override Show Title:")
        self.override_label.grid(column=0, row=1, sticky=tk.W, pady=2)
        self.override_entry = ttk.Entry(options_frame)
        self.override_entry.grid(column=1, row=1, sticky="ew", padx=5, pady=2)
        self.override_entry.bind("<KeyRelease>", self.validate_override_title)

        ttk.Label(options_frame, text="Episode Number Regex Group:").grid(column=0, row=2, sticky=tk.W, pady=2)
        self.episode_group_entry = ttk.Entry(options_frame, width=10)
        self.episode_group_entry.grid(column=1, row=2, sticky=tk.W, padx=5, pady=2)
        self.episode_group_entry.insert(0, "2")

        # Expandable configuration
        for i in range(4):
            main_frame.columnconfigure(i, weight=1 if i == 1 else 0)
        main_frame.rowconfigure(3, weight=1)
        main_frame.rowconfigure(6, weight=2)
        options_frame.columnconfigure(1, weight=1)

        instructions = ttk.Label(main_frame, text="Note: Folder name must contain pattern {anidb4-12345} to fetch from AniDB",
                                 font=("Arial", 8), foreground="gray")
        instructions.grid(column=0, row=7, columnspan=4, sticky=tk.W, pady=(5,0))

    def validate_override_title(self, event=None):
        title = self.override_entry.get()
        valid = is_valid_windows_filename(title)
        self.override_entry.config(style="TEntry" if valid else "Invalid.TEntry")
        self.override_label.config(
            text="Override Show Title:" if valid else "Invalid Windows filename!",
            foreground="black" if valid else "red"
        )

    def select_folder(self):
        folder_path = self.filedialog.askdirectory()
        if folder_path:
            self.folder_entry.delete(0, self.tk.END)
            self.folder_entry.insert(0, folder_path)

    def fetch_anidb_data(self):
        """Fetch AniDB data and populate the titles text area."""
        folder_path = self.folder_entry.get()
        if not folder_path:
            self.messagebox.showerror("Error", "Please select a folder first.")
            return

        episodes, error = fetch_anidb_data_core(folder_path)
        if error:
            self.messagebox.showerror("Error", error)
            return

        formatted_titles = format_titles_from_episodes(episodes)
        self.titles_text.delete("1.0", self.tk.END)
        self.titles_text.insert(self.tk.END, formatted_titles)
        self.log_message(f"Successfully loaded {len(episodes)} episodes from AniDB")

    def log_message(self, message):
        self.log_text.insert(self.tk.END, message + "\n")
        self.log_text.see(self.tk.END)

    def start_renaming(self):
        folder_path = self.folder_entry.get()
        dry_run = self.dry_run_var.get()
        regex_pattern = self.regex_entry.get()

        try:
            episode_offset = int(self.offset_entry.get())
            episode_group = int(self.episode_group_entry.get())
        except ValueError:
            self.messagebox.showerror("Error", "Offset and Episode Group must be integers.")
            return

        override_title = self.override_entry.get()

        if not folder_path:
            self.messagebox.showerror("Error", "Please select a folder.")
            return

        if override_title and not is_valid_windows_filename(override_title):
            self.messagebox.showerror("Error", "The override title is not a valid Windows filename.")
            return

        save_regex_to_history(regex_pattern, self.history_file)
        self.regex_entry['values'] = load_regex_history(self.history_file)

        # Get titles from text widget
        titles_data = self.titles_text.get("1.0", self.tk.END)
        episode_titles_dic = format_titles(titles_data)

        # Redirect stdout to capture print statements from core function
        old_stdout = sys.stdout
        sys.stdout = io.StringIO()

        results = rename_files_core(
            folder_path, dry_run, regex_pattern, episode_offset,
            override_title, episode_group, episode_titles_dic
        )

        # Restore stdout and get log
        output = sys.stdout.getvalue()
        sys.stdout = old_stdout

        # Display results
        self.log_text.delete("1.0", self.tk.END)
        self.log_text.insert(self.tk.END, output)

        for old_name, new_name, status in results:
            if new_name:
                self.log_message(f"{'[DRY-RUN] Would rename' if dry_run else 'Renamed'}: '{old_name}' -> '{new_name}'")
            elif status.startswith("Error"):
                self.log_message(f"[ERROR] {old_name}: {status}")

        if not results:
            self.log_message("No files matched the pattern.")

    def run(self):
        self.root.mainloop()

# ==============================================================================
# CLI INTERFACE
# ==============================================================================

def run_cli():
    parser = argparse.ArgumentParser(
        description='Rename anime files based on AniDB data. CLI mode automatically fetches titles from AniDB.',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # GUI Mode
  python %(prog)s

  # List saved regex patterns
  python %(prog)s -l

  # Use regex from history index 2
  python %(prog)s -i /path/to/folder -x 2

  # Dry-run with specific regex
  python %(prog)s -i /path/to/folder -r "pattern"

  # Actually rename files (execute mode)
  python %(prog)s -i /path/to/folder --execute
        """
    )

    # Input/Action arguments
    parser.add_argument('-i', '--input', metavar='PATH',
                       help='Input folder path (must contain {anidbN-XXXX} in folder name)')
    parser.add_argument('-e', '--execute', action='store_true',
                       help='Actually rename files (default is dry-run)')
    parser.add_argument('-l', '--list-regex', action='store_true',
                       help='List saved regex patterns with indices and exit')

    # Regex selection (mutually exclusive by behavior, not enforced by argparse to allow flexibility)
    regex_group = parser.add_mutually_exclusive_group()
    regex_group.add_argument('-r', '--regex',
                           default=r'\[SubsPlease\] (.+?) - (\d{2}) \(1080p\) \[\w+\]\.mkv',
                           help='Regex pattern with capture groups (default: SubsPlease pattern)')
    regex_group.add_argument('-x', '--regex-index', type=int, metavar='N',
                           help='Use regex from history at index N (see --list-regex)')

    # Other options
    parser.add_argument('-o', '--offset', type=int, default=0,
                       help='Episode number offset (default: 0)')
    parser.add_argument('-t', '--override-title', default='',
                       help='Override series title (optional)')
    parser.add_argument('-g', '--episode-group', type=int, default=2,
                       help='Regex group index for episode number (default: 2)')
    parser.add_argument('--history-file', default='regex_history.txt',
                       help='Path to regex history file')
    parser.add_argument('--no-color', action='store_true',
                       help='Disable colored output')

    args = parser.parse_args()

    # Handle list-regex first (doesn't require -i)
    if args.list_regex:
        sys.exit(list_regex_history_cli(args.history_file))

    # Validate that -i is provided for actual operations
    if not args.input:
        parser.error("the following arguments are required: -i/--input (unless using --list-regex)")

    # Colors for terminal output
    if args.no_color or os.name == 'nt':
        RED = GREEN = YELLOW = CYAN = RESET = ''
    else:
        RED = '\033[91m'
        GREEN = '\033[92m'
        YELLOW = '\033[93m'
        CYAN = '\033[96m'
        RESET = '\033[0m'

    # Determine which regex to use
    regex_pattern = args.regex
    if args.regex_index is not None:
        history = load_regex_history(args.history_file)
        if not history:
            print(f"{RED}Error: History file is empty or not found: {args.history_file}{RESET}", file=sys.stderr)
            sys.exit(1)
        if args.regex_index < 0 or args.regex_index >= len(history):
            print(f"{RED}Error: Regex index {args.regex_index} out of range (0-{len(history)-1}){RESET}", file=sys.stderr)
            sys.exit(1)
        regex_pattern = history[args.regex_index]
        print(f"{CYAN}Using regex from history [{args.regex_index}]:{RESET} {regex_pattern}")
    else:
        print(f"{CYAN}Using regex:{RESET} {regex_pattern}")

    # Validate inputs
    if not os.path.isdir(args.input):
        print(f"{RED}Error: Folder not found: {args.input}{RESET}", file=sys.stderr)
        sys.exit(1)

    # Check for AniDB pattern in folder name
    folder_name = os.path.basename(args.input)
    if not re.search(r'\{anidb\d+-\d+\}', folder_name):
        print(f"{RED}Error: Folder name must contain AniDB pattern like {{anidb4-12345}}{RESET}", file=sys.stderr)
        sys.exit(1)

    if args.override_title and not is_valid_windows_filename(args.override_title):
        print(f"{RED}Error: Override title contains invalid characters for Windows filenames{RESET}", file=sys.stderr)
        sys.exit(1)

    # Step 1: Fetch AniDB data (Required in CLI mode)
    print(f"{YELLOW}Fetching AniDB data for:{RESET} {folder_name}")
    episodes, error = fetch_anidb_data_core(args.input)

    if error:
        print(f"{RED}Error fetching AniDB data: {error}{RESET}", file=sys.stderr)
        sys.exit(1)

    print(f"{GREEN}Successfully fetched {len(episodes)} episodes from AniDB{RESET}")

    # Convert to dictionary for renaming
    titles_data = format_titles_from_episodes(episodes)
    episode_titles_dic = format_titles(titles_data)

    # Display fetched titles (compact view)
    print(f"\n{YELLOW}Episode mapping:{RESET}")
    for epno, title in episodes[:5]:  # Show first 5
        print(f"  EP{epno:02d}: {title}")
    if len(episodes) > 5:
        print(f"  ... and {len(episodes) - 5} more")

    # Step 2: Execute rename
    dry_run = not args.execute

    print()
    if dry_run:
        print(f"{YELLOW}=== DRY-RUN MODE (use --execute to actually rename) ==={RESET}")
    else:
        print(f"{YELLOW}=== EXECUTING RENAME ==={RESET}")

    results = rename_files_core(
        args.input,
        dry_run,
        regex_pattern,
        args.offset,
        args.override_title,
        args.episode_group,
        episode_titles_dic
    )

    print()
    renamed_count = 0
    for old_name, new_name, status in results:
        if new_name:
            action = f"{YELLOW}[DRY-RUN]{RESET}" if dry_run else f"{GREEN}[OK]{RESET}"
            print(f"{action} {old_name} -> {new_name}")
            renamed_count += 1
        elif status.startswith("Error"):
            print(f"{RED}[ERROR]{RESET} {old_name}: {status}")

    if results and not any(r[1] for r in results):
        print(f"{YELLOW}Warning: No files matched the regex pattern{RESET}")
    elif not results:
        print(f"{YELLOW}Warning: No files found in directory{RESET}")

    # Save regex to history (the one we actually used)
    save_regex_to_history(regex_pattern, args.history_file)

    print()
    if dry_run and renamed_count > 0:
        print(f"{YELLOW}Dry-run complete. {renamed_count} files would be renamed.{RESET}")
        print(f"Run with {CYAN}--execute{RESET} to confirm.")
    elif not dry_run:
        print(f"{GREEN}Success: {renamed_count} files renamed{RESET}")

# ==============================================================================
# MAIN ENTRY POINT
# ==============================================================================

if __name__ == "__main__":
    # If any arguments are provided (other than the script name), run CLI
    if len(sys.argv) > 1:
        run_cli()
    else:
        # Run GUI mode - only import tkinter here when actually needed
        app = RenamerGUI()
        app.run()