Files
Media_Scripts/rename.py
Imrayya 628875f676 feat(anidb_cache): add cache mechanism to fetch data efficiently
Added caching mechanism to store and retrieve AniDB API responses for efficient data fetching. This reduces redundant requests and improves performance by utilizing cached data when available. Cache files are stored in a directory named 'anidb_cache', and each entry is identified by the anime ID (aid). The cache validity period is set at 24 hours, ensuring that outdated information is refreshed regularly.
2026-02-16 16:49:11 +07:00

597 lines
24 KiB
Python

import os
import re
import io
import sys
import requests
import xml.etree.ElementTree as ET
import argparse
import json
import time
from datetime import datetime, timedelta
HISTORY_FILE = "regex_history.txt"
CACHE_DIR = "anidb_cache"
CACHE_EXPIRY_SECONDS = 86400 # 24 hours in seconds
# ==============================================================================
# CORE LOGIC (GUI-agnostic)
# ==============================================================================
def fetch_anidb_data_core(folder_path):
"""
Fetch episode data from AniDB based on folder name pattern {anidbN-X}.
Returns: (episodes_list, error_message)
episodes_list is list of tuples: [(epno, title), ...]
"""
folder_name = os.path.basename(folder_path)
# Extract anidb ID using regex (handles patterns like {anidb4-18874})
match = re.search(r'\{anidb\d+-(\d+)\}', folder_name)
if not match:
return None, "Could not find valid AniDB ID in folder name.\nFolder name should contain pattern like: {anidb4-18874}"
aid = match.group(1)
# Check if cache exists and is valid
cache_file = os.path.join(CACHE_DIR, f"{aid}.json")
cache_data = None
if os.path.exists(cache_file):
try:
with open(cache_file, 'r', encoding='utf-8') as f:
cache_data = json.load(f)
# Check if cache is still valid (not expired)
cache_timestamp = cache_data.get('timestamp', 0)
if cache_timestamp > time.time() - CACHE_EXPIRY_SECONDS:
episodes = cache_data.get('episodes', [])
if episodes:
# Sort episodes by number
episodes.sort(key=lambda x: x[0])
return episodes, None
except (IOError, json.JSONDecodeError, KeyError):
# If cache is corrupted, continue to fetch fresh data
pass
# Fetch XML from AniDB API
url = f"http://api.anidb.net:9001/httpapi?request=anime&client=testdesktop&clientver=1&protover=1&aid={aid}"
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
except requests.exceptions.RequestException as e:
return None, f"Failed to fetch AniDB data:\n{str(e)}"
# Parse XML and extract episodes
try:
root = ET.fromstring(response.content)
except ET.ParseError as e:
return None, f"Failed to parse AniDB response:\n{str(e)}"
# Extract regular episodes (type="1") with their numbers and titles
episodes = []
for episode in root.findall('.//episode'):
epno_elem = episode.find('epno')
if epno_elem is not None and epno_elem.get('type') == '1':
try:
epno = int(epno_elem.text)
except (ValueError, TypeError):
continue
# Get all title elements for this episode
title_elements = episode.findall('title')
# Create a dictionary of titles by language
titles_by_lang = {}
for title_elem in title_elements:
lang = title_elem.get('{http://www.w3.org/XML/1998/namespace}lang')
if lang and title_elem.text:
titles_by_lang[lang] = title_elem.text
# Try to get English title first, then romanized (x-jat), then Japanese
title = None
for lang in ['en', 'x-jat', 'ja']:
if lang in titles_by_lang:
title = titles_by_lang[lang]
break
# Fallback if no preferred language found
if title is None and title_elements:
for title_elem in title_elements:
if title_elem.text and title_elem.text.strip():
title = title_elem.text
break
if title is None:
title = f"Episode {epno}"
episodes.append((epno, title))
if not episodes:
return None, "No regular episodes were found in the AniDB response."
# Sort episodes by number
episodes.sort(key=lambda x: x[0])
# Save to cache
try:
# Create cache directory if it doesn't exist
os.makedirs(CACHE_DIR, exist_ok=True)
cache_info = {
'aid': aid,
'timestamp': time.time(),
'episodes': episodes
}
with open(cache_file, 'w', encoding='utf-8') as f:
json.dump(cache_info, f, indent=2)
except IOError:
# Cache save failure is not critical, continue with episodes
pass
return episodes, None
def format_titles_from_episodes(episodes):
"""Convert episodes list to tab-separated string format."""
return "\n".join([f"{epno}\t{title}" for epno, title in episodes])
def format_titles(titles_data):
"""Parse tab-separated titles into a dictionary."""
lines = titles_data.strip().split('\n')
return {
int(parts[0]): parts[1].replace("`", "'")
for line in lines if (parts := line.split('\t')) and len(parts) >= 2
}
def is_valid_windows_filename(filename):
"""Check if filename is valid for Windows."""
if not filename:
return True # Empty is valid (means no override)
invalid_chars = r'[<>:"/\\|?*]'
if re.search(invalid_chars, filename):
return False
if filename.endswith(' ') or filename.endswith('.'):
return False
reserved = {'CON', 'PRN', 'AUX', 'NUL'} | {f'COM{i}' for i in range(1, 10)} | {f'LPT{i}' for i in range(1, 10)}
return filename.upper() not in reserved
def rename_files_core(folder_path, dry_run, regex_pattern, episode_offset, override_title, episode_group, episode_titles_dic):
"""
Core renaming logic.
Returns list of tuples: [(old_filename, new_filename_or_None, status), ...]
"""
results = []
if not os.path.isdir(folder_path):
return results
for filename in os.listdir(folder_path):
full_path = os.path.join(folder_path, filename)
if not os.path.isfile(full_path):
continue
match = re.match(regex_pattern, filename)
if match:
try:
series_title = override_title if override_title else match.group(1)
episode_number = int(match.group(episode_group)) + episode_offset
episode_title = episode_titles_dic.get(episode_number, "Unknown Title")
episode_title = re.sub(r'[\\/:*?"<>|]', '_', episode_title)
new_filename = f"{series_title} - EP{episode_number:02d} - {episode_title}.mkv"
new_full_path = os.path.join(folder_path, new_filename)
if not dry_run:
os.rename(full_path, new_full_path)
results.append((filename, new_filename, "renamed"))
except (IndexError, ValueError) as e:
results.append((filename, None, f"Error: {str(e)}"))
return results
def save_regex_to_history(pattern, history_file=HISTORY_FILE):
"""Save regex pattern to history file."""
if not pattern:
return
history = []
if os.path.exists(history_file):
with open(history_file, 'r', encoding='utf-8') as f:
history = [line.strip() for line in f if line.strip()]
if pattern not in history:
history.insert(0, pattern)
try:
with open(history_file, 'w', encoding='utf-8') as f:
f.write('\n'.join(history[:20]))
except IOError:
pass
def load_regex_history(history_file=HISTORY_FILE):
"""Load regex history from file."""
if os.path.exists(history_file):
with open(history_file, 'r', encoding='utf-8') as f:
return [line.strip() for line in f if line.strip()]
return []
def list_regex_history_cli(history_file):
"""Print regex history with indices for CLI"""
history = load_regex_history(history_file)
if not history:
print("No regex history found.")
return 0
print("Saved regex patterns (use --regex-index N to select):")
print("-" * 60)
for i, pattern in enumerate(history):
# Show full pattern but truncate if extremely long for display formatting
display = pattern[:57] + "..." if len(pattern) > 60 else pattern
marker = " (default)" if i == 0 and pattern == r'\[SubsPlease\] (.+?) - (\d{2}) \(1080p\) \[\w+\]\.mkv' else ""
print(f" [{i}] {display}{marker}")
print("-" * 60)
print(f"Total: {len(history)} patterns")
return 0
# ==============================================================================
# GUI CLASS (LAZY IMPORT)
# ==============================================================================
class RenamerGUI:
def __init__(self, history_file=HISTORY_FILE):
# Import tkinter only when GUI is actually instantiated
import tkinter as tk
from tkinter import filedialog, scrolledtext, ttk, messagebox
self.tk = tk
self.filedialog = filedialog
self.scrolledtext = scrolledtext
self.ttk = ttk
self.messagebox = messagebox
self.history_file = history_file
self.root = tk.Tk()
self.root.title("File Renamer")
self.root.geometry("850x650")
self.style = ttk.Style()
self.style.configure("Invalid.TEntry", fieldbackground="pink")
self._build_ui()
def _build_ui(self):
tk = self.tk
ttk = self.ttk
notebook = ttk.Notebook(self.root)
notebook.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
main_frame = ttk.Frame(notebook, padding="10")
options_frame = ttk.Frame(notebook, padding="10")
notebook.add(main_frame, text="Main")
notebook.add(options_frame, text="Options")
# Main Tab
ttk.Label(main_frame, text="Folder Path:").grid(column=0, row=0, sticky=tk.W, pady=2)
self.folder_entry = ttk.Entry(main_frame)
self.folder_entry.grid(column=1, row=0, sticky="ew", padx=5, pady=2)
ttk.Button(main_frame, text="Browse", command=self.select_folder).grid(column=2, row=0, sticky=tk.W, padx=5, pady=2)
ttk.Button(main_frame, text="Fetch from AniDB", command=self.fetch_anidb_data).grid(column=3, row=0, sticky=tk.W, padx=5, pady=2)
ttk.Label(main_frame, text="Regex Pattern:").grid(column=0, row=1, sticky=tk.W, pady=2)
self.regex_entry = ttk.Combobox(main_frame, values=load_regex_history(self.history_file))
self.regex_entry.grid(column=1, row=1, columnspan=3, sticky="ew", padx=5, pady=2)
self.regex_entry.set(r'\[SubsPlease\] (.+?) - (\d{2}) \(1080p\) \[\w+\]\.mkv')
ttk.Label(main_frame, text="Episode Titles:").grid(column=0, row=2, sticky=tk.W, pady=2)
self.titles_text = self.scrolledtext.ScrolledText(main_frame, height=8)
self.titles_text.grid(column=0, row=3, columnspan=4, sticky="nsew", padx=5, pady=2)
self.dry_run_var = tk.BooleanVar(value=True)
ttk.Checkbutton(main_frame, text="Dry Run", variable=self.dry_run_var).grid(column=0, row=4, sticky=tk.W, pady=2)
ttk.Button(main_frame, text="Rename Files", command=self.start_renaming).grid(column=3, row=4, sticky=tk.E, pady=2)
ttk.Label(main_frame, text="Log:").grid(column=0, row=5, sticky=tk.W, pady=2)
self.log_text = self.scrolledtext.ScrolledText(main_frame)
self.log_text.grid(column=0, row=6, columnspan=4, sticky="nsew", padx=5, pady=2)
# Options Tab
ttk.Label(options_frame, text="Episode Number Offset:").grid(column=0, row=0, sticky=tk.W, pady=2)
self.offset_entry = ttk.Entry(options_frame, width=10)
self.offset_entry.grid(column=1, row=0, sticky=tk.W, padx=5, pady=2)
self.offset_entry.insert(0, "0")
self.override_label = ttk.Label(options_frame, text="Override Show Title:")
self.override_label.grid(column=0, row=1, sticky=tk.W, pady=2)
self.override_entry = ttk.Entry(options_frame)
self.override_entry.grid(column=1, row=1, sticky="ew", padx=5, pady=2)
self.override_entry.bind("<KeyRelease>", self.validate_override_title)
ttk.Label(options_frame, text="Episode Number Regex Group:").grid(column=0, row=2, sticky=tk.W, pady=2)
self.episode_group_entry = ttk.Entry(options_frame, width=10)
self.episode_group_entry.grid(column=1, row=2, sticky=tk.W, padx=5, pady=2)
self.episode_group_entry.insert(0, "2")
# Expandable configuration
for i in range(4):
main_frame.columnconfigure(i, weight=1 if i == 1 else 0)
main_frame.rowconfigure(3, weight=1)
main_frame.rowconfigure(6, weight=2)
options_frame.columnconfigure(1, weight=1)
instructions = ttk.Label(main_frame, text="Note: Folder name must contain pattern {anidb4-12345} to fetch from AniDB",
font=("Arial", 8), foreground="gray")
instructions.grid(column=0, row=7, columnspan=4, sticky=tk.W, pady=(5,0))
def validate_override_title(self, event=None):
title = self.override_entry.get()
valid = is_valid_windows_filename(title)
self.override_entry.config(style="TEntry" if valid else "Invalid.TEntry")
self.override_label.config(
text="Override Show Title:" if valid else "Invalid Windows filename!",
foreground="black" if valid else "red"
)
def select_folder(self):
folder_path = self.filedialog.askdirectory()
if folder_path:
self.folder_entry.delete(0, self.tk.END)
self.folder_entry.insert(0, folder_path)
def fetch_anidb_data(self):
"""Fetch AniDB data and populate the titles text area."""
folder_path = self.folder_entry.get()
if not folder_path:
self.messagebox.showerror("Error", "Please select a folder first.")
return
episodes, error = fetch_anidb_data_core(folder_path)
if error:
self.messagebox.showerror("Error", error)
return
formatted_titles = format_titles_from_episodes(episodes)
self.titles_text.delete("1.0", self.tk.END)
self.titles_text.insert(self.tk.END, formatted_titles)
self.log_message(f"Successfully loaded {len(episodes)} episodes from AniDB")
def log_message(self, message):
self.log_text.insert(self.tk.END, message + "\n")
self.log_text.see(self.tk.END)
def start_renaming(self):
folder_path = self.folder_entry.get()
dry_run = self.dry_run_var.get()
regex_pattern = self.regex_entry.get()
try:
episode_offset = int(self.offset_entry.get())
episode_group = int(self.episode_group_entry.get())
except ValueError:
self.messagebox.showerror("Error", "Offset and Episode Group must be integers.")
return
override_title = self.override_entry.get()
if not folder_path:
self.messagebox.showerror("Error", "Please select a folder.")
return
if override_title and not is_valid_windows_filename(override_title):
self.messagebox.showerror("Error", "The override title is not a valid Windows filename.")
return
save_regex_to_history(regex_pattern, self.history_file)
self.regex_entry['values'] = load_regex_history(self.history_file)
# Get titles from text widget
titles_data = self.titles_text.get("1.0", self.tk.END)
episode_titles_dic = format_titles(titles_data)
# Redirect stdout to capture print statements from core function
old_stdout = sys.stdout
sys.stdout = io.StringIO()
results = rename_files_core(
folder_path, dry_run, regex_pattern, episode_offset,
override_title, episode_group, episode_titles_dic
)
# Restore stdout and get log
output = sys.stdout.getvalue()
sys.stdout = old_stdout
# Display results
self.log_text.delete("1.0", self.tk.END)
self.log_text.insert(self.tk.END, output)
for old_name, new_name, status in results:
if new_name:
self.log_message(f"{'[DRY-RUN] Would rename' if dry_run else 'Renamed'}: '{old_name}' -> '{new_name}'")
elif status.startswith("Error"):
self.log_message(f"[ERROR] {old_name}: {status}")
if not results:
self.log_message("No files matched the pattern.")
def run(self):
self.root.mainloop()
# ==============================================================================
# CLI INTERFACE
# ==============================================================================
def run_cli():
parser = argparse.ArgumentParser(
description='Rename anime files based on AniDB data. CLI mode automatically fetches titles from AniDB.',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# GUI Mode
python %(prog)s
# List saved regex patterns
python %(prog)s -l
# Use regex from history index 2
python %(prog)s -i /path/to/folder -x 2
# Dry-run with specific regex
python %(prog)s -i /path/to/folder -r "pattern"
# Actually rename files (execute mode)
python %(prog)s -i /path/to/folder --execute
"""
)
# Input/Action arguments
parser.add_argument('-i', '--input', metavar='PATH',
help='Input folder path (must contain {anidbN-XXXX} in folder name)')
parser.add_argument('-e', '--execute', action='store_true',
help='Actually rename files (default is dry-run)')
parser.add_argument('-l', '--list-regex', action='store_true',
help='List saved regex patterns with indices and exit')
# Regex selection (mutually exclusive by behavior, not enforced by argparse to allow flexibility)
regex_group = parser.add_mutually_exclusive_group()
regex_group.add_argument('-r', '--regex',
default=r'\[SubsPlease\] (.+?) - (\d{2}) \(1080p\) \[\w+\]\.mkv',
help='Regex pattern with capture groups (default: SubsPlease pattern)')
regex_group.add_argument('-x', '--regex-index', type=int, metavar='N',
help='Use regex from history at index N (see --list-regex)')
# Other options
parser.add_argument('-o', '--offset', type=int, default=0,
help='Episode number offset (default: 0)')
parser.add_argument('-t', '--override-title', default='',
help='Override series title (optional)')
parser.add_argument('-g', '--episode-group', type=int, default=2,
help='Regex group index for episode number (default: 2)')
parser.add_argument('--history-file', default='regex_history.txt',
help='Path to regex history file')
parser.add_argument('--no-color', action='store_true',
help='Disable colored output')
args = parser.parse_args()
# Handle list-regex first (doesn't require -i)
if args.list_regex:
sys.exit(list_regex_history_cli(args.history_file))
# Validate that -i is provided for actual operations
if not args.input:
parser.error("the following arguments are required: -i/--input (unless using --list-regex)")
# Colors for terminal output
if args.no_color or os.name == 'nt':
RED = GREEN = YELLOW = CYAN = RESET = ''
else:
RED = '\033[91m'
GREEN = '\033[92m'
YELLOW = '\033[93m'
CYAN = '\033[96m'
RESET = '\033[0m'
# Determine which regex to use
regex_pattern = args.regex
if args.regex_index is not None:
history = load_regex_history(args.history_file)
if not history:
print(f"{RED}Error: History file is empty or not found: {args.history_file}{RESET}", file=sys.stderr)
sys.exit(1)
if args.regex_index < 0 or args.regex_index >= len(history):
print(f"{RED}Error: Regex index {args.regex_index} out of range (0-{len(history)-1}){RESET}", file=sys.stderr)
sys.exit(1)
regex_pattern = history[args.regex_index]
print(f"{CYAN}Using regex from history [{args.regex_index}]:{RESET} {regex_pattern}")
else:
print(f"{CYAN}Using regex:{RESET} {regex_pattern}")
# Validate inputs
if not os.path.isdir(args.input):
print(f"{RED}Error: Folder not found: {args.input}{RESET}", file=sys.stderr)
sys.exit(1)
# Check for AniDB pattern in folder name
folder_name = os.path.basename(args.input)
if not re.search(r'\{anidb\d+-\d+\}', folder_name):
print(f"{RED}Error: Folder name must contain AniDB pattern like {{anidb4-12345}}{RESET}", file=sys.stderr)
sys.exit(1)
if args.override_title and not is_valid_windows_filename(args.override_title):
print(f"{RED}Error: Override title contains invalid characters for Windows filenames{RESET}", file=sys.stderr)
sys.exit(1)
# Step 1: Fetch AniDB data (Required in CLI mode)
print(f"{YELLOW}Fetching AniDB data for:{RESET} {folder_name}")
episodes, error = fetch_anidb_data_core(args.input)
if error:
print(f"{RED}Error fetching AniDB data: {error}{RESET}", file=sys.stderr)
sys.exit(1)
print(f"{GREEN}Successfully fetched {len(episodes)} episodes from AniDB{RESET}")
# Convert to dictionary for renaming
titles_data = format_titles_from_episodes(episodes)
episode_titles_dic = format_titles(titles_data)
# Display fetched titles (compact view)
print(f"\n{YELLOW}Episode mapping:{RESET}")
for epno, title in episodes[:5]: # Show first 5
print(f" EP{epno:02d}: {title}")
if len(episodes) > 5:
print(f" ... and {len(episodes) - 5} more")
# Step 2: Execute rename
dry_run = not args.execute
print()
if dry_run:
print(f"{YELLOW}=== DRY-RUN MODE (use --execute to actually rename) ==={RESET}")
else:
print(f"{YELLOW}=== EXECUTING RENAME ==={RESET}")
results = rename_files_core(
args.input,
dry_run,
regex_pattern,
args.offset,
args.override_title,
args.episode_group,
episode_titles_dic
)
print()
renamed_count = 0
for old_name, new_name, status in results:
if new_name:
action = f"{YELLOW}[DRY-RUN]{RESET}" if dry_run else f"{GREEN}[OK]{RESET}"
print(f"{action} {old_name} -> {new_name}")
renamed_count += 1
elif status.startswith("Error"):
print(f"{RED}[ERROR]{RESET} {old_name}: {status}")
if results and not any(r[1] for r in results):
print(f"{YELLOW}Warning: No files matched the regex pattern{RESET}")
elif not results:
print(f"{YELLOW}Warning: No files found in directory{RESET}")
# Save regex to history (the one we actually used)
save_regex_to_history(regex_pattern, args.history_file)
print()
if dry_run and renamed_count > 0:
print(f"{YELLOW}Dry-run complete. {renamed_count} files would be renamed.{RESET}")
print(f"Run with {CYAN}--execute{RESET} to confirm.")
elif not dry_run:
print(f"{GREEN}Success: {renamed_count} files renamed{RESET}")
# ==============================================================================
# MAIN ENTRY POINT
# ==============================================================================
if __name__ == "__main__":
# If any arguments are provided (other than the script name), run CLI
if len(sys.argv) > 1:
run_cli()
else:
# Run GUI mode - only import tkinter here when actually needed
app = RenamerGUI()
app.run()