diff --git a/.gitignore b/.gitignore index f8abaae..55b6a9b 100644 --- a/.gitignore +++ b/.gitignore @@ -51,4 +51,9 @@ build/ # Exclude build scripts and README build-and-deploy.sh -build-and-deploy.ps1 \ No newline at end of file +build-and-deploy.ps1 + +# Claude AI specific files +.claude +CLAUDE.md +PR_DESCRIPTION.md \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..b92a607 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,180 @@ +# Changelog + +## [Unreleased] - 2025-07-05 + +### Added + +#### Radarr/Sonarr Integration +- ✅ Created Radarr API client module (`internal/radarr/`) with full API support + - Movie search by title, year, TMDb ID, IMDb ID, and file path + - Automatic TMDb ID extraction from Radarr database + - Connection testing and system status endpoints + +- ✅ Created Sonarr API client module (`internal/sonarr/`) with full API support + - TV series search by title, year, TMDb ID, TVDb ID, IMDb ID, and file path + - Episode fetching and file path matching + - Connection testing and system status endpoints + +- ✅ Updated configuration system to support Radarr/Sonarr + - Added `USE_RADARR` and `USE_SONARR` environment variables + - Added `RADARR_URL`, `RADARR_API_KEY`, `SONARR_URL`, `SONARR_API_KEY` configuration + - Added validation for Radarr/Sonarr settings when enabled + +- ✅ Enhanced TMDb ID extraction to use multiple sources + - Primary: Plex metadata (existing functionality preserved) + - Secondary: Radarr/Sonarr API matching (new) + - Fallback: File path regex matching (existing functionality preserved) + - Added source tracking to show where TMDb ID was found + +- ✅ Updated media processor to integrate with Radarr/Sonarr + - Modified `extractMovieTMDbID` to query Radarr when enabled + - Modified `extractTVShowTMDbID` to query Sonarr when enabled + - Added multiple matching strategies: title/year, IMDb ID, TVDb ID, file path + +- ✅ Updated main application to initialize Radarr/Sonarr clients + - Added connection testing on startup + - Graceful handling when Radarr/Sonarr are not configured + +- ✅ Created comprehensive docker-compose.yml example + - Includes all existing configuration options + - Added Radarr/Sonarr configuration examples with defaults + +#### Verbose Logging & Debugging + +- ✅ Added verbose logging feature + - New `VERBOSE_LOGGING` environment variable (default: false) + - Shows detailed TMDb ID lookup process for each item + - Displays all available Plex GUIDs (IMDb, TMDb, TVDb) + - Shows Radarr lookup attempts with title, file path, and IMDb ID matching + - Shows Sonarr lookup attempts with title, TVDb ID, IMDb ID, and file path matching + - Indicates source of successful TMDb ID matches + - Helps troubleshoot matching issues + +- ✅ Added progress tracking for large libraries + - Shows percentage progress for libraries with >100 items + - Displays current processing status + - Shows summary of skipped items in verbose mode + +- ✅ Enhanced label/genre application logging + - Shows when keywords are being applied to Plex + - Displays Plex API call timing in verbose mode + - Shows current and new keywords being merged + - Confirms successful application to Plex + +#### Persistent Storage + +- ✅ Added persistent storage for processed items + - Prevents reprocessing items after container restarts + - JSON file-based storage with atomic writes + - Tracks which field (label/genre) was updated for each item + - Configurable data directory via DATA_DIR environment variable + - Docker volume support for data persistence + - Storage directory defaults to `/data` in container + +#### Error Handling & Connection Testing + +- ✅ Added TMDb API connection testing on startup + - Validates API token before processing begins + - Provides clear error messages for authentication failures + - Shows detailed error responses for debugging + +- ✅ Improved error handling throughout + - Better error messages for TMDb API failures + - Clear indication of authentication vs other errors + - Verbose mode shows why items are skipped + +### Changed + +- Modified `NewProcessor` to accept optional Radarr/Sonarr clients and return error +- Enhanced TMDb ID detection to show source (Plex metadata, Radarr, Sonarr, or file path) +- Processor initialization now includes persistent storage setup +- Main application now tests all API connections on startup + +### Documentation + +- ✅ Updated README.md with comprehensive documentation + - Added Radarr/Sonarr Integration section with benefits and configuration + - Added Verbose Logging section with examples + - Updated environment variables documentation + - Added persistent storage information + - Updated docker-compose examples + +- ✅ Created detailed CHANGELOG.md + - Comprehensive list of all changes + - Organized by feature area + - Technical implementation details + +#### Keyword Normalization + +- ✅ Added intelligent keyword normalization feature + - Automatically normalizes TMDb keywords for consistent formatting + - Pattern-based recognition for dynamic handling without hardcoding + - Smart title casing with proper article and preposition handling + - Automatic duplicate removal after normalization + +- ✅ Pattern Recognition Features + - **Critical Replacements**: Known abbreviations (sci-fi → Sci-Fi, romcom → Romantic Comedy) + - **Acronym Detection**: Automatically uppercases known acronyms (FBI, CIA, DEA, etc.) + - **Agency Patterns**: Detects agency roles (dea agent → DEA Agent) + - **Parenthetical Acronyms**: Handles acronyms in parentheses (central intelligence agency (cia) → Central Intelligence Agency (CIA)) + - **Century Patterns**: Properly formats centuries (5th century bc → 5th Century BC) + - **City/State Patterns**: Handles location formatting (san francisco, california → San Francisco, California) + - **Relationship Patterns**: Adds "Relationship" where appropriate (father daughter → Father Daughter Relationship) + - **Credit Stinger Terms**: Expands compound terms (duringcreditsstinger → During Credits Stinger) + +- ✅ Added comprehensive test suite + - 90+ test cases covering various normalization scenarios + - Tests for edge cases, mixed case preservation, and pattern matching + - Ensures consistent behavior across different keyword types + +- ✅ Smart duplicate cleaning functionality + - Automatically removes old unnormalized keywords when adding normalized versions + - Preserves manually set keywords in Plex + - Prevents accumulation of duplicate keywords (e.g., both "sci-fi" and "Sci-Fi") + - Shows cleaning activity in verbose logging mode + +#### Force Update Mode + +- ✅ Added force update functionality + - New `FORCE_UPDATE` environment variable (default: false) + - Reprocesses all items regardless of previous processing status + - Useful for applying keyword normalization to existing libraries + - Shows clear indication when force update mode is active + - Bypasses both storage checks and "already has keywords" logic + +### Changed + +- Modified `NewProcessor` to accept optional Radarr/Sonarr clients and return error +- Enhanced TMDb ID detection to show source (Plex metadata, Radarr, Sonarr, or file path) +- Processor initialization now includes persistent storage setup +- Main application now tests all API connections on startup +- TMDb client now normalizes all keywords before returning them +- Updated keyword display to show normalization in verbose mode +- Enhanced keyword synchronization with smart duplicate cleaning +- Force update mode bypasses all previous processing checks + +### Documentation + +- ✅ Updated README.md with comprehensive documentation + - Added Radarr/Sonarr Integration section with benefits and configuration + - Added Verbose Logging section with examples + - Added Keyword Normalization section with pattern examples + - Added Force Update Mode section with use cases and examples + - Added Smart Duplicate Cleaning documentation + - Updated environment variables documentation + - Added persistent storage information + - Updated docker-compose examples + +- ✅ Created detailed CHANGELOG.md + - Comprehensive list of all changes + - Organized by feature area + - Technical implementation details + +### Technical Details +- Radarr/Sonarr clients use API v3 endpoints +- Implemented robust error handling and fallback mechanisms +- No breaking changes - Radarr/Sonarr integration is fully optional +- Maintains backward compatibility with existing file path matching +- Verbose logging provides detailed insights without affecting normal operation +- Keyword normalization uses regex patterns for scalability +- All features are designed to be non-breaking and backward compatible \ No newline at end of file diff --git a/README.md b/README.md index 2ac9064..c32dad7 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,8 @@ **Automatically sync TMDb keywords as Plex labels or genres for movies and TV shows** Lightweight Docker container that bridges Plex with The Movie Database, adding searchable keywords to your media. +> **🔀 Fork Notice**: This is an enhanced fork of the original [Labelarr](https://github.com/Buttercup2k/Labelarr) project with new features including Radarr/Sonarr integration, persistent storage, verbose logging, and intelligent keyword normalization. + ## 🚀 Quick Start ### Docker Compose (Recommended) @@ -35,17 +37,35 @@ services: # Optional settings - PROCESS_TIMER=1h - UPDATE_FIELD=label # or 'genre' + # Optional Radarr/Sonarr integration + # - USE_RADARR=true + # - RADARR_URL=http://radarr:7878 + # - RADARR_API_KEY=your_radarr_api_key + # - USE_SONARR=true + # - SONARR_URL=http://sonarr:8989 + # - SONARR_API_KEY=your_sonarr_api_key ``` **Run:** `docker-compose up -d` ### What it does -✅ **Detects TMDb IDs** from Plex metadata or file paths (e.g., `{tmdb-12345}`) +✅ **Detects TMDb IDs** from Plex metadata, Radarr/Sonarr APIs, or file paths (e.g., `{tmdb-12345}`) ✅ **Fetches keywords** from TMDb API for movies and TV shows +✅ **Normalizes keywords** with proper capitalization and spelling ✅ **Adds as Plex labels/genres** - never removes existing values ✅ **Runs automatically** on configurable timer (default: 1 hour) -✅ **Multi-architecture** support (AMD64 + ARM64) +✅ **Multi-architecture** support (AMD64 + ARM64) + +### 🎉 New Features in This Fork + +- **🚀 Radarr/Sonarr Integration** - Automatically detect TMDb IDs from your media managers +- **💾 Persistent Storage** - Tracks processed items across container restarts +- **🔍 Verbose Logging** - Detailed debugging information for troubleshooting +- **📝 Keyword Normalization** - Intelligent formatting with pattern recognition +- **🔄 Force Update Mode** - Reprocess all items regardless of previous processing status +- **🧹 Smart Duplicate Cleaning** - Automatically removes old unnormalized keywords when adding normalized versions +- **🔒 Enhanced Error Handling** - Better authentication and connection testing --- @@ -128,6 +148,21 @@ services: - `UPDATE_FIELD=label` - Field to update: `label` or `genre` (default: `label`) - `PROCESS_TIMER=1h` - How often to run 24h, 5m, 2h30m etc. (default: `1h`) - `REMOVE=lock` - Clean mode: `lock` or `unlock` (runs once and exits) +- `VERBOSE_LOGGING=true` - Enable detailed lookup information (default: `false`) +- `DATA_DIR=/data` - Directory for persistent storage (default: `/data`) +- `FORCE_UPDATE=true` - Force reprocess all items regardless of previous processing (default: `false`) + +**Radarr Integration (Optional):** + +- `USE_RADARR=true` - Enable Radarr integration (default: `false`) +- `RADARR_URL=http://localhost:7878` - Your Radarr instance URL +- `RADARR_API_KEY=your_api_key` - Your Radarr API key + +**Sonarr Integration (Optional):** + +- `USE_SONARR=true` - Enable Sonarr integration (default: `false`) +- `SONARR_URL=http://localhost:8989` - Your Sonarr instance URL +- `SONARR_API_KEY=your_api_key` - Your Sonarr API key @@ -144,12 +179,67 @@ services: +
+

🚀 Radarr/Sonarr Integration

+ +Labelarr now supports automatic TMDb ID detection through Radarr and Sonarr APIs, eliminating the need for TMDb IDs in file paths! + +### Benefits + +- ✅ **No file renaming required** - Works with your existing file structure +- ✅ **Multiple matching methods** - Title, year, IMDb ID, TVDb ID, file path +- ✅ **Automatic fallback** - If Radarr/Sonarr doesn't have the item, falls back to file path detection +- ✅ **Optional integration** - Enable only if you use Radarr/Sonarr + +### How It Works + +1. **For Movies (Radarr)**: + - Matches by title and year + - Falls back to IMDb ID from Plex + - Checks file paths against Radarr's database + - Extracts TMDb ID from matched movie + +2. **For TV Shows (Sonarr)**: + - Matches by title and year + - Uses TVDb ID from Plex if available + - Falls back to IMDb ID + - Checks episode file paths against Sonarr's database + - Extracts TMDb ID from matched series + +### Configuration Example + +```yaml +services: + labelarr: + image: ghcr.io/nullable-eth/labelarr:latest + environment: + # ... other config ... + + # Enable Radarr integration + - USE_RADARR=true + - RADARR_URL=http://radarr:7878 + - RADARR_API_KEY=your_radarr_api_key + + # Enable Sonarr integration + - USE_SONARR=true + - SONARR_URL=http://sonarr:8989 + - SONARR_API_KEY=your_sonarr_api_key +``` + +### Finding Your API Keys + +**Radarr**: Settings → General → Security → API Key +**Sonarr**: Settings → General → Security → API Key + +
+

🔍 TMDb ID Detection

The application can find TMDb IDs from multiple sources and supports flexible formats: - **Plex Metadata**: Standard TMDb agent IDs +- **Radarr/Sonarr APIs**: Automatic matching (when enabled) - **File Paths**: Flexible TMDb ID detection in filenames or directory names ### ✅ **Supported Patterns** (Case-Insensitive) @@ -422,6 +512,159 @@ In Plex Web UI, you'll see:
+
+

🔍 Verbose Logging

+ +Enable verbose logging to see detailed information about TMDb ID lookups and matching attempts. + +### What it shows + +When `VERBOSE_LOGGING=true`, you'll see: + +- 📋 All available Plex GUIDs for each item +- 🎬 Radarr lookup attempts (title, file path, IMDb ID) +- 📺 Sonarr lookup attempts (title, TVDb ID, IMDb ID, file paths) +- 📁 File path pattern matching attempts +- ✅ Successful matches with source information +- ❌ Failed lookup attempts with reasons + +### Example Output + +``` +🔍 Starting TMDb ID lookup for movie: The Matrix (1999) + 📋 Available Plex GUIDs: + - imdb://tt0133093 + - tmdb://603 + ✅ Found TMDb ID in Plex metadata: 603 + +🔍 Starting TMDb ID lookup for movie: Inception (2010) + 📋 Available Plex GUIDs: + - imdb://tt1375666 + 🎬 Checking Radarr for movie match... + → Searching by title: "Inception" year: 2010 + ✅ Found match in Radarr: Inception (TMDb: 27205) + +🔍 Starting TMDb ID lookup for TV show: Breaking Bad (2008) + 📋 Available Plex GUIDs: + - tvdb://81189 + - imdb://tt0903747 + 📺 Checking Sonarr for series match... + → Searching by title: "Breaking Bad" year: 2008 + ❌ No match found by title/year + → Searching by TVDb ID: 81189 + ✅ Found match by TVDb ID: Breaking Bad (TMDb: 1396) +``` + +### Configuration + +```yaml +environment: + - VERBOSE_LOGGING=true +``` + +This is especially useful for: +- Troubleshooting why certain items aren't being matched +- Understanding which data source provided the TMDb ID +- Debugging Radarr/Sonarr integration issues + +
+ +
+

📝 Keyword Normalization

+ +Labelarr automatically normalizes keywords from TMDb using intelligent pattern recognition and proper capitalization rules. + +### How it works + +- **Smart Title Casing**: Proper capitalization with article/preposition handling +- **Acronym Recognition**: Automatically detects "fbi" → "FBI", "usa" → "USA" +- **Pattern-Based Rules**: Dynamic handling of common patterns without hardcoding every keyword +- **Critical Replacements**: Known abbreviations like "sci-fi" → "Sci-Fi", "romcom" → "Romantic Comedy" +- **Intelligent Patterns**: Recognizes relationships, locations, decades, and compound terms +- **Duplicate Removal**: Removes duplicates after normalization + +### Examples + +**Before normalization:** +``` +sci-fi, action, fbi, based on novel, time travel, woman in peril +``` + +**After normalization:** +``` +Sci-Fi, Action, FBI, Based on Novel, Time Travel, Woman in Peril +``` + +### Pattern Recognition Examples + +- **Critical Replacements**: `sci-fi`, `scifi`, `sci fi` → `Sci-Fi` +- **Relationships**: `father daughter` → `Father Daughter Relationship` +- **Locations**: `san francisco, california` → `San Francisco, California` +- **Versus Patterns**: `man vs nature` → `Man vs Nature` +- **Based On**: `based on novel` → `Based on Novel` +- **Decades**: `1940s` → `1940s` (preserved) +- **Ethnicity**: `african american lead` → `African American Lead` +- **General Terms**: Any multi-word keyword gets proper title casing + +### Smart Duplicate Cleaning + +Labelarr automatically cleans up duplicate keywords when applying normalization: + +- **Removes old versions**: If you have "sci-fi" and we add "Sci-Fi", the old version is removed +- **Preserves manual keywords**: Custom tags you've added manually are always kept +- **Handles complex patterns**: Works with all normalization patterns (agencies, centuries, etc.) + +### Verbose Logging + +With `VERBOSE_LOGGING=true`, you'll see normalization and cleaning in action: +``` +📝 Normalized: "sci-fi" → "Sci-Fi" +📝 Normalized: "fbi" → "FBI" +📝 Normalized: "based on novel" → "Based on Novel" +🧹 Cleaned 2 duplicate/unnormalized keywords +``` + +
+ +
+

🔄 Force Update Mode

+ +Use force update mode to reprocess all items in your library, regardless of whether they've been processed before. This is especially useful after implementing keyword normalization or when you want to refresh all metadata. + +### When to use Force Update + +- **After enabling keyword normalization** - Update existing keywords with proper formatting +- **Configuration changes** - When switching between label/genre fields +- **Keyword cleanup** - Refresh all TMDb keywords with latest data +- **Initial migration** - When moving from another labeling system + +### Configuration + +```yaml +environment: + - FORCE_UPDATE=true +``` + +### What it does + +When `FORCE_UPDATE=true`: +- ✅ Processes all items regardless of previous processing status +- ✅ Reapplies keywords even if they already exist +- ✅ Updates storage with latest processing information +- ✅ Shows "FORCE UPDATE MODE" message in logs + +### Example Output + +``` +✅ Found 1250 movies in library +🔄 FORCE UPDATE MODE: All items will be reprocessed regardless of previous processing +⏳ Processing movies... +``` + +**⚠️ Note**: Force update will reprocess your entire library, which may take time for large collections. Consider running with `VERBOSE_LOGGING=true` to monitor progress. + +
+

🔑 Getting API Keys

diff --git a/cmd/labelarr/main.go b/cmd/labelarr/main.go index e0f9ad7..7240937 100644 --- a/cmd/labelarr/main.go +++ b/cmd/labelarr/main.go @@ -8,6 +8,8 @@ import ( "github.com/nullable-eth/labelarr/internal/config" "github.com/nullable-eth/labelarr/internal/media" "github.com/nullable-eth/labelarr/internal/plex" + "github.com/nullable-eth/labelarr/internal/radarr" + "github.com/nullable-eth/labelarr/internal/sonarr" "github.com/nullable-eth/labelarr/internal/tmdb" ) @@ -24,9 +26,42 @@ func main() { // Initialize clients plexClient := plex.NewClient(cfg) tmdbClient := tmdb.NewClient(cfg) + + // Test TMDb connection + if err := tmdbClient.TestConnection(); err != nil { + fmt.Printf("❌ Failed to connect to TMDb: %v\n", err) + os.Exit(1) + } + fmt.Println("✅ Successfully connected to TMDb") + + // Initialize Radarr client if enabled + var radarrClient *radarr.Client + if cfg.UseRadarr { + radarrClient = radarr.NewClient(cfg.RadarrURL, cfg.RadarrAPIKey) + if err := radarrClient.TestConnection(); err != nil { + fmt.Printf("❌ Failed to connect to Radarr: %v\n", err) + os.Exit(1) + } + fmt.Println("✅ Successfully connected to Radarr") + } + + // Initialize Sonarr client if enabled + var sonarrClient *sonarr.Client + if cfg.UseSonarr { + sonarrClient = sonarr.NewClient(cfg.SonarrURL, cfg.SonarrAPIKey) + if err := sonarrClient.TestConnection(); err != nil { + fmt.Printf("❌ Failed to connect to Sonarr: %v\n", err) + os.Exit(1) + } + fmt.Println("✅ Successfully connected to Sonarr") + } // Initialize single processor - processor := media.NewProcessor(cfg, plexClient, tmdbClient) + processor, err := media.NewProcessor(cfg, plexClient, tmdbClient, radarrClient, sonarrClient) + if err != nil { + fmt.Printf("❌ Failed to initialize processor: %v\n", err) + os.Exit(1) + } fmt.Println("🏷️ Starting Labelarr with TMDb Integration...") fmt.Printf("📡 Server: %s://%s:%s\n", cfg.Protocol, cfg.PlexServer, cfg.PlexPort) diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..3c8f3c7 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,39 @@ +version: '3.8' + +services: + labelarr: + image: ghcr.io/nullable-eth/labelarr:latest + container_name: labelarr + restart: unless-stopped + volumes: + - ./labelarr-data:/data # Persistent storage for processed items + environment: + # Required - Get from Plex Web (F12 → Network → X-Plex-Token) + - PLEX_TOKEN=your_plex_token_here + # Required - Get from https://www.themoviedb.org/settings/api + - TMDB_READ_ACCESS_TOKEN=your_tmdb_read_access_token + # Required - Your Plex server details + - PLEX_SERVER=localhost + - PLEX_PORT=32400 + - PLEX_REQUIRES_HTTPS=true + + # Process all libraries (recommended for first-time users) + - MOVIE_PROCESS_ALL=true + - TV_PROCESS_ALL=true + + # Optional settings + - PROCESS_TIMER=1h + - UPDATE_FIELD=label # or 'genre' + - VERBOSE_LOGGING=false # Set to true for detailed lookup information + - DATA_DIR=/data # Directory for persistent storage (mounted as volume) + - FORCE_UPDATE=false # Set to true to reprocess all items + + # Radarr integration (optional) + - USE_RADARR=false # Set to true to enable + - RADARR_URL=http://localhost:7878 # Your Radarr URL + - RADARR_API_KEY=your_radarr_api_key # Your Radarr API key + + # Sonarr integration (optional) + - USE_SONARR=false # Set to true to enable + - SONARR_URL=http://localhost:8989 # Your Sonarr URL + - SONARR_API_KEY=your_sonarr_api_key # Your Sonarr API key \ No newline at end of file diff --git a/example/docker-compose.yml b/example/docker-compose.yml index 3b074bb..b6c366e 100644 --- a/example/docker-compose.yml +++ b/example/docker-compose.yml @@ -5,13 +5,33 @@ services: image: ghcr.io/nullable-eth/labelarr:latest container_name: labelarr restart: unless-stopped + volumes: + - ./labelarr-data:/data # Persistent storage for processed items environment: + # Required - PLEX_SERVER=localhost - PLEX_PORT=32400 - PLEX_REQUIRES_HTTPS=true - PLEX_TOKEN=your_plex_token_here - TMDB_READ_ACCESS_TOKEN=your_tmdb_read_access_token - - PROCESS_TIMER=1h + + # Library Processing - MOVIE_PROCESS_ALL=true - TV_PROCESS_ALL=true - \ No newline at end of file + + # Optional Settings + - PROCESS_TIMER=1h + - UPDATE_FIELD=label # or 'genre' + - VERBOSE_LOGGING=false + - DATA_DIR=/data + - FORCE_UPDATE=false + + # Radarr Integration (optional) + # - USE_RADARR=true + # - RADARR_URL=http://radarr:7878 + # - RADARR_API_KEY=your_radarr_api_key + + # Sonarr Integration (optional) + # - USE_SONARR=true + # - SONARR_URL=http://sonarr:8989 + # - SONARR_API_KEY=your_sonarr_api_key \ No newline at end of file diff --git a/internal/config/config.go b/internal/config/config.go index e902e18..94e8029 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -21,6 +21,25 @@ type Config struct { RemoveMode string TMDbReadAccessToken string ProcessTimer time.Duration + + // Radarr configuration + RadarrURL string + RadarrAPIKey string + UseRadarr bool + + // Sonarr configuration + SonarrURL string + SonarrAPIKey string + UseSonarr bool + + // Logging configuration + VerboseLogging bool + + // Storage configuration + DataDir string + + // Force update configuration + ForceUpdate bool } // Load loads configuration from environment variables @@ -37,6 +56,25 @@ func Load() *Config { RemoveMode: os.Getenv("REMOVE"), TMDbReadAccessToken: os.Getenv("TMDB_READ_ACCESS_TOKEN"), ProcessTimer: getProcessTimerFromEnv(), + + // Radarr configuration + RadarrURL: os.Getenv("RADARR_URL"), + RadarrAPIKey: os.Getenv("RADARR_API_KEY"), + UseRadarr: getBoolEnvWithDefault("USE_RADARR", false), + + // Sonarr configuration + SonarrURL: os.Getenv("SONARR_URL"), + SonarrAPIKey: os.Getenv("SONARR_API_KEY"), + UseSonarr: getBoolEnvWithDefault("USE_SONARR", false), + + // Logging configuration + VerboseLogging: getBoolEnvWithDefault("VERBOSE_LOGGING", false), + + // Storage configuration + DataDir: getEnvWithDefault("DATA_DIR", "/data"), + + // Force update configuration + ForceUpdate: getBoolEnvWithDefault("FORCE_UPDATE", false), } // Set protocol based on HTTPS requirement @@ -84,6 +122,27 @@ func (c *Config) Validate() error { if c.RemoveMode != "" && c.RemoveMode != "lock" && c.RemoveMode != "unlock" { return fmt.Errorf("REMOVE must be 'lock' or 'unlock'") } + + // Validate Radarr configuration if enabled + if c.UseRadarr { + if c.RadarrURL == "" { + return fmt.Errorf("RADARR_URL environment variable is required when USE_RADARR is true") + } + if c.RadarrAPIKey == "" { + return fmt.Errorf("RADARR_API_KEY environment variable is required when USE_RADARR is true") + } + } + + // Validate Sonarr configuration if enabled + if c.UseSonarr { + if c.SonarrURL == "" { + return fmt.Errorf("SONARR_URL environment variable is required when USE_SONARR is true") + } + if c.SonarrAPIKey == "" { + return fmt.Errorf("SONARR_API_KEY environment variable is required when USE_SONARR is true") + } + } + return nil } diff --git a/internal/media/processor.go b/internal/media/processor.go index dca28be..5ebf16c 100644 --- a/internal/media/processor.go +++ b/internal/media/processor.go @@ -8,7 +8,11 @@ import ( "github.com/nullable-eth/labelarr/internal/config" "github.com/nullable-eth/labelarr/internal/plex" + "github.com/nullable-eth/labelarr/internal/radarr" + "github.com/nullable-eth/labelarr/internal/sonarr" + "github.com/nullable-eth/labelarr/internal/storage" "github.com/nullable-eth/labelarr/internal/tmdb" + "github.com/nullable-eth/labelarr/internal/utils" ) // MediaType represents the type of media being processed @@ -19,14 +23,7 @@ const ( MediaTypeTV MediaType = "tv" ) -// ProcessedItem tracks processing state for any media item -type ProcessedItem struct { - RatingKey string - Title string - TMDbID string - LastProcessed time.Time - KeywordsSynced bool -} +// ProcessedItem is now imported from storage package // MediaItem interface for common media operations type MediaItem interface { @@ -41,20 +38,38 @@ type MediaItem interface { // Processor handles media processing operations for any media type type Processor struct { - config *config.Config - plexClient *plex.Client - tmdbClient *tmdb.Client - processedItems map[string]*ProcessedItem + config *config.Config + plexClient *plex.Client + tmdbClient *tmdb.Client + radarrClient *radarr.Client + sonarrClient *sonarr.Client + storage *storage.Storage } // NewProcessor creates a new generic media processor -func NewProcessor(cfg *config.Config, plexClient *plex.Client, tmdbClient *tmdb.Client) *Processor { - return &Processor{ - config: cfg, - plexClient: plexClient, - tmdbClient: tmdbClient, - processedItems: make(map[string]*ProcessedItem), +func NewProcessor(cfg *config.Config, plexClient *plex.Client, tmdbClient *tmdb.Client, radarrClient *radarr.Client, sonarrClient *sonarr.Client) (*Processor, error) { + // Initialize persistent storage + stor, err := storage.NewStorage(cfg.DataDir) + if err != nil { + return nil, fmt.Errorf("failed to initialize storage: %w", err) + } + + processor := &Processor{ + config: cfg, + plexClient: plexClient, + tmdbClient: tmdbClient, + radarrClient: radarrClient, + sonarrClient: sonarrClient, + storage: stor, } + + // Log storage initialization + count := stor.Count() + if count > 0 { + fmt.Printf("📁 Loaded %d previously processed items from storage\n", count) + } + + return processor, nil } // ProcessAllItems processes all items in the specified library @@ -85,15 +100,39 @@ func (p *Processor) ProcessAllItems(libraryID string, mediaType MediaType) error totalCount := len(items) fmt.Printf("✅ Found %d %s in library\n", totalCount, displayName) + + if p.config.ForceUpdate { + fmt.Printf("🔄 FORCE UPDATE MODE: All items will be reprocessed regardless of previous processing\n") + } + + if p.config.VerboseLogging { + fmt.Printf("🔎 Starting detailed processing with verbose logging enabled...\n") + } else { + fmt.Printf("⏳ Processing %s... (enable VERBOSE_LOGGING=true for detailed lookup information)\n", displayName) + } newItems := 0 updatedItems := 0 skippedItems := 0 skippedAlreadyExist := 0 + + // Progress tracking + processedCount := 0 + lastProgressReport := 0 for _, item := range items { - processed, exists := p.processedItems[item.GetRatingKey()] - if exists && processed.KeywordsSynced { + processedCount++ + + // Show progress for large libraries + if totalCount > 100 { + progress := (processedCount * 100) / totalCount + if progress >= lastProgressReport + 10 { + fmt.Printf("📊 Progress: %d%% (%d/%d %s processed)\n", progress, processedCount, totalCount, displayName) + lastProgressReport = progress + } + } + processed, exists := p.storage.Get(item.GetRatingKey()) + if exists && processed.KeywordsSynced && processed.UpdateField == p.config.UpdateField && !p.config.ForceUpdate { skippedItems++ skippedAlreadyExist++ continue @@ -103,23 +142,39 @@ func (p *Processor) ProcessAllItems(libraryID string, mediaType MediaType) error tmdbID := p.extractTMDbID(item, mediaType) if tmdbID == "" { skippedItems++ + if p.config.VerboseLogging && skippedItems <= 10 { + fmt.Printf(" ⏭️ Skipped %s: %s (%d) - No TMDb ID found\n", strings.TrimSuffix(displayName, "s"), item.GetTitle(), item.GetYear()) + } continue } // Silently fetch keywords and details to check if processing is needed keywords, err := p.getKeywords(tmdbID, mediaType) if err != nil { + if p.config.VerboseLogging { + fmt.Printf(" ❌ Error fetching keywords for TMDb ID %s: %v\n", tmdbID, err) + } skippedItems++ continue } + + if p.config.VerboseLogging { + fmt.Printf(" 📥 Fetched %d keywords from TMDb: %v\n", len(keywords), keywords) + } details, err := p.getItemDetails(item.GetRatingKey(), mediaType) if err != nil { + if p.config.VerboseLogging { + fmt.Printf(" ❌ Error fetching item details: %v\n", err) + } skippedItems++ continue } currentValues := p.extractCurrentValues(details) + if p.config.VerboseLogging { + fmt.Printf(" 📋 Current %ss in Plex: %v\n", p.config.UpdateField, currentValues) + } currentValuesMap := make(map[string]bool) for _, val := range currentValues { @@ -127,26 +182,52 @@ func (p *Processor) ProcessAllItems(libraryID string, mediaType MediaType) error } allKeywordsExist := true + var missingKeywords []string for _, keyword := range keywords { if !currentValuesMap[strings.ToLower(keyword)] { allKeywordsExist = false - break + missingKeywords = append(missingKeywords, keyword) } } - if allKeywordsExist { + if allKeywordsExist && !p.config.ForceUpdate { // Silently skip - no verbose output + if p.config.VerboseLogging { + fmt.Printf(" ✨ Already has all keywords, skipping\n") + } skippedItems++ skippedAlreadyExist++ continue } + + if p.config.ForceUpdate && allKeywordsExist { + if p.config.VerboseLogging { + fmt.Printf(" 🔄 Force update enabled - reprocessing item with existing keywords\n") + } + } + + if p.config.VerboseLogging { + fmt.Printf(" 🆕 Missing keywords to add: %v\n", missingKeywords) + } // Only show verbose output for completely new items (never processed before) if !exists { fmt.Printf("\n%s Processing new %s: %s (%d)\n", emoji, strings.TrimSuffix(displayName, "s"), item.GetTitle(), item.GetYear()) - fmt.Printf("🔑 TMDb ID: %s (%s)\n", tmdbID, item.GetTitle()) + + // Show source of TMDb ID + source := p.getTMDbIDSource(item, mediaType, tmdbID) + fmt.Printf("🔑 TMDb ID: %s (source: %s)\n", tmdbID, source) fmt.Printf("🏷️ Found %d TMDb keywords\n", len(keywords)) } + + // Show when we're about to apply labels/genres + if p.config.VerboseLogging || !exists { + fmt.Printf("🔄 Applying %d keywords to %s field...\n", len(keywords), p.config.UpdateField) + if p.config.VerboseLogging { + fmt.Printf(" Current %ss: %v\n", p.config.UpdateField, currentValues) + fmt.Printf(" New keywords to add: %v\n", keywords) + } + } err = p.syncFieldWithKeywords(item.GetRatingKey(), libraryID, currentValues, keywords, mediaType) if err != nil { @@ -157,13 +238,23 @@ func (p *Processor) ProcessAllItems(libraryID string, mediaType MediaType) error skippedItems++ continue } + + // Show success message when labels/genres are applied + if p.config.VerboseLogging || !exists { + fmt.Printf("✅ Successfully applied %d keywords to Plex %s field\n", len(keywords), p.config.UpdateField) + } - p.processedItems[item.GetRatingKey()] = &ProcessedItem{ + processedItem := &storage.ProcessedItem{ RatingKey: item.GetRatingKey(), Title: item.GetTitle(), TMDbID: tmdbID, LastProcessed: time.Now(), KeywordsSynced: true, + UpdateField: p.config.UpdateField, + } + + if err := p.storage.Set(processedItem); err != nil { + fmt.Printf("⚠️ Warning: Failed to save processed item to storage: %v\n", err) } if exists { @@ -175,6 +266,11 @@ func (p *Processor) ProcessAllItems(libraryID string, mediaType MediaType) error time.Sleep(500 * time.Millisecond) } + + // Show verbose summary if items were skipped + if p.config.VerboseLogging && skippedItems > 10 { + fmt.Printf(" ... and %d more items skipped\n", skippedItems - 10) + } fmt.Printf("\n📊 Processing Summary:\n") fmt.Printf(" 📈 Total %s in library: %d\n", displayName, totalCount) @@ -359,20 +455,16 @@ func (p *Processor) getKeywords(tmdbID string, mediaType MediaType) ([]string, e // syncFieldWithKeywords synchronizes the configured field with TMDb keywords func (p *Processor) syncFieldWithKeywords(itemID, libraryID string, currentValues []string, keywords []string, mediaType MediaType) error { - mergedValues := append(currentValues, keywords...) - - // Remove duplicates while preserving order - seen := make(map[string]bool) - var uniqueValues []string - for _, value := range mergedValues { - lowerValue := strings.ToLower(value) - if !seen[lowerValue] { - seen[lowerValue] = true - uniqueValues = append(uniqueValues, value) - } + // Clean duplicates: remove old unnormalized versions when normalized versions are present + // This helps clean up cases like having both "sci-fi" and "Sci-Fi" + cleanedValues := utils.CleanDuplicateKeywords(currentValues, keywords) + + if p.config.VerboseLogging && len(cleanedValues) != len(currentValues) { + removedCount := len(currentValues) - len(cleanedValues) + len(keywords) + fmt.Printf(" 🧹 Cleaned %d duplicate/unnormalized keywords\n", removedCount) } - return p.updateItemField(itemID, libraryID, uniqueValues, mediaType) + return p.updateItemField(itemID, libraryID, cleanedValues, mediaType) } // toPlexMediaType converts MediaType to the string format expected by plex client @@ -443,59 +535,314 @@ func (p *Processor) extractTMDbID(item MediaItem, mediaType MediaType) string { // extractMovieTMDbID extracts TMDb ID from movie metadata or file paths func (p *Processor) extractMovieTMDbID(item MediaItem) string { + if p.config.VerboseLogging { + fmt.Printf("\n🔍 Starting TMDb ID lookup for movie: %s (%d)\n", item.GetTitle(), item.GetYear()) + fmt.Printf(" 📋 Available Plex GUIDs:\n") + for _, guid := range item.GetGuid() { + fmt.Printf(" - %s\n", guid.ID) + } + } + // First, try to get TMDb ID from Plex metadata for _, guid := range item.GetGuid() { if strings.Contains(guid.ID, "tmdb://") { parts := strings.Split(guid.ID, "//") if len(parts) > 1 { tmdbID := strings.Split(parts[1], "?")[0] + if p.config.VerboseLogging { + fmt.Printf(" ✅ Found TMDb ID in Plex metadata: %s\n", tmdbID) + } return tmdbID } } } - // If not found in metadata, try to extract from file paths + // If Radarr is enabled, try to match via Radarr + if p.config.UseRadarr && p.radarrClient != nil { + if p.config.VerboseLogging { + fmt.Printf(" 🎬 Checking Radarr for movie match...\n") + } + + // Try to match by title and year first + if p.config.VerboseLogging { + fmt.Printf(" → Searching by title: \"%s\" year: %d\n", item.GetTitle(), item.GetYear()) + } + movie, err := p.radarrClient.FindMovieMatch(item.GetTitle(), item.GetYear()) + if err == nil && movie != nil { + tmdbID := p.radarrClient.GetTMDbIDFromMovie(movie) + if p.config.VerboseLogging { + fmt.Printf(" ✅ Found match in Radarr: %s (TMDb: %s)\n", movie.Title, tmdbID) + } + return tmdbID + } else if p.config.VerboseLogging { + fmt.Printf(" ❌ No match found by title/year\n") + } + + // Try to match by file path + if p.config.VerboseLogging { + fmt.Printf(" → Searching by file path...\n") + } + for _, mediaItem := range item.GetMedia() { + for _, part := range mediaItem.Part { + if p.config.VerboseLogging { + fmt.Printf(" - Checking: %s\n", part.File) + } + movie, err := p.radarrClient.GetMovieByPath(part.File) + if err == nil && movie != nil { + tmdbID := p.radarrClient.GetTMDbIDFromMovie(movie) + if p.config.VerboseLogging { + fmt.Printf(" ✅ Found match by file path: %s (TMDb: %s)\n", movie.Title, tmdbID) + } + return tmdbID + } + } + } + if p.config.VerboseLogging { + fmt.Printf(" ❌ No match found by file path\n") + } + + // Try to match by IMDb ID if available + for _, guid := range item.GetGuid() { + if strings.Contains(guid.ID, "imdb://") { + imdbID := strings.TrimPrefix(guid.ID, "imdb://") + if p.config.VerboseLogging { + fmt.Printf(" → Searching by IMDb ID: %s\n", imdbID) + } + movie, err := p.radarrClient.GetMovieByIMDbID(imdbID) + if err == nil && movie != nil { + tmdbID := p.radarrClient.GetTMDbIDFromMovie(movie) + if p.config.VerboseLogging { + fmt.Printf(" ✅ Found match by IMDb ID: %s (TMDb: %s)\n", movie.Title, tmdbID) + } + return tmdbID + } else if p.config.VerboseLogging { + fmt.Printf(" ❌ No match found by IMDb ID\n") + } + } + } + } + + // If not found in Radarr or Radarr not enabled, try to extract from file paths + if p.config.VerboseLogging { + fmt.Printf(" 📁 Checking file paths for TMDb ID pattern...\n") + } for _, mediaItem := range item.GetMedia() { for _, part := range mediaItem.Part { + if p.config.VerboseLogging { + fmt.Printf(" - Checking: %s\n", part.File) + } if tmdbID := ExtractTMDbIDFromPath(part.File); tmdbID != "" { + if p.config.VerboseLogging { + fmt.Printf(" ✅ Found TMDb ID in file path: %s\n", tmdbID) + } return tmdbID } } } + + if p.config.VerboseLogging { + fmt.Printf(" ❌ No TMDb ID found for movie: %s\n", item.GetTitle()) + } return "" } // extractTVShowTMDbID extracts TMDb ID from TV show metadata or episode file paths func (p *Processor) extractTVShowTMDbID(item MediaItem) string { + if p.config.VerboseLogging { + fmt.Printf("\n🔍 Starting TMDb ID lookup for TV show: %s (%d)\n", item.GetTitle(), item.GetYear()) + fmt.Printf(" 📋 Available Plex GUIDs:\n") + for _, guid := range item.GetGuid() { + fmt.Printf(" - %s\n", guid.ID) + } + } + // First check if we have TMDb GUID in the TV show metadata for _, guid := range item.GetGuid() { if strings.HasPrefix(guid.ID, "tmdb://") { - return strings.TrimPrefix(guid.ID, "tmdb://") + tmdbID := strings.TrimPrefix(guid.ID, "tmdb://") + if p.config.VerboseLogging { + fmt.Printf(" ✅ Found TMDb ID in Plex metadata: %s\n", tmdbID) + } + return tmdbID } } - // If no TMDb GUID found, get episodes and check their file paths + // If Sonarr is enabled, try to match via Sonarr + if p.config.UseSonarr && p.sonarrClient != nil { + if p.config.VerboseLogging { + fmt.Printf(" 📺 Checking Sonarr for series match...\n") + } + + // Try to match by title and year first + if p.config.VerboseLogging { + fmt.Printf(" → Searching by title: \"%s\" year: %d\n", item.GetTitle(), item.GetYear()) + } + series, err := p.sonarrClient.FindSeriesMatch(item.GetTitle(), item.GetYear()) + if err == nil && series != nil { + tmdbID := p.sonarrClient.GetTMDbIDFromSeries(series) + if p.config.VerboseLogging { + fmt.Printf(" ✅ Found match in Sonarr: %s (TMDb: %s)\n", series.Title, tmdbID) + } + return tmdbID + } else if p.config.VerboseLogging { + fmt.Printf(" ❌ No match found by title/year\n") + } + + // Try to match by TVDb ID if available + for _, guid := range item.GetGuid() { + if strings.Contains(guid.ID, "tvdb://") { + tvdbIDStr := strings.TrimPrefix(guid.ID, "tvdb://") + // Parse TVDb ID to int + var tvdbID int + if _, err := fmt.Sscanf(tvdbIDStr, "%d", &tvdbID); err == nil { + if p.config.VerboseLogging { + fmt.Printf(" → Searching by TVDb ID: %d\n", tvdbID) + } + series, err := p.sonarrClient.GetSeriesByTVDbID(tvdbID) + if err == nil && series != nil { + tmdbID := p.sonarrClient.GetTMDbIDFromSeries(series) + if p.config.VerboseLogging { + fmt.Printf(" ✅ Found match by TVDb ID: %s (TMDb: %s)\n", series.Title, tmdbID) + } + return tmdbID + } else if p.config.VerboseLogging { + fmt.Printf(" ❌ No match found by TVDb ID\n") + } + } + } + } + + // Try to match by IMDb ID if available + for _, guid := range item.GetGuid() { + if strings.Contains(guid.ID, "imdb://") { + imdbID := strings.TrimPrefix(guid.ID, "imdb://") + if p.config.VerboseLogging { + fmt.Printf(" → Searching by IMDb ID: %s\n", imdbID) + } + series, err := p.sonarrClient.GetSeriesByIMDbID(imdbID) + if err == nil && series != nil { + tmdbID := p.sonarrClient.GetTMDbIDFromSeries(series) + if p.config.VerboseLogging { + fmt.Printf(" ✅ Found match by IMDb ID: %s (TMDb: %s)\n", series.Title, tmdbID) + } + return tmdbID + } else if p.config.VerboseLogging { + fmt.Printf(" ❌ No match found by IMDb ID\n") + } + } + } + + // Try to match by file path from episodes + if p.config.VerboseLogging { + fmt.Printf(" → Searching by episode file paths...\n") + } + episodes, err := p.plexClient.GetTVShowEpisodes(item.GetRatingKey()) + if err == nil { + episodeCount := 0 + for _, episode := range episodes { + for _, mediaItem := range episode.Media { + for _, part := range mediaItem.Part { + episodeCount++ + if episodeCount <= 5 && p.config.VerboseLogging { + fmt.Printf(" - Checking: %s\n", part.File) + } + series, err := p.sonarrClient.GetSeriesByPath(part.File) + if err == nil && series != nil { + tmdbID := p.sonarrClient.GetTMDbIDFromSeries(series) + if p.config.VerboseLogging { + fmt.Printf(" ✅ Found match by file path: %s (TMDb: %s)\n", series.Title, tmdbID) + } + return tmdbID + } + } + } + } + if episodeCount > 5 && p.config.VerboseLogging { + fmt.Printf(" ... and %d more episodes\n", episodeCount-5) + } + if p.config.VerboseLogging { + fmt.Printf(" ❌ No match found by file path\n") + } + } else if p.config.VerboseLogging { + fmt.Printf(" ⚠️ Could not fetch episodes: %v\n", err) + } + } + + // If no TMDb GUID found and Sonarr not enabled, get episodes and check their file paths + if p.config.VerboseLogging { + fmt.Printf(" 📁 Checking episode file paths for TMDb ID pattern...\n") + } episodes, err := p.plexClient.GetTVShowEpisodes(item.GetRatingKey()) if err != nil { - fmt.Printf("⚠️ Error fetching episodes for %s: %v\n", item.GetTitle(), err) + if p.config.VerboseLogging { + fmt.Printf(" ⚠️ Error fetching episodes: %v\n", err) + } else { + fmt.Printf("⚠️ Error fetching episodes for %s: %v\n", item.GetTitle(), err) + } return "" } // Check file paths in episodes for TMDb ID - stop at first match + episodeCount := 0 for _, episode := range episodes { for _, mediaItem := range episode.Media { for _, part := range mediaItem.Part { + episodeCount++ + if episodeCount <= 5 && p.config.VerboseLogging { + fmt.Printf(" - Checking: %s\n", part.File) + } if tmdbID := ExtractTMDbIDFromPath(part.File); tmdbID != "" { + if p.config.VerboseLogging { + fmt.Printf(" ✅ Found TMDb ID in file path: %s\n", tmdbID) + } return tmdbID } } } } + + if episodeCount > 5 && p.config.VerboseLogging { + fmt.Printf(" ... and %d more episodes\n", episodeCount-5) + } + + if p.config.VerboseLogging { + fmt.Printf(" ❌ No TMDb ID found for TV show: %s\n", item.GetTitle()) + } return "" } +// getTMDbIDSource determines the source of the TMDb ID +func (p *Processor) getTMDbIDSource(item MediaItem, mediaType MediaType, tmdbID string) string { + // Check if it's from Plex metadata + for _, guid := range item.GetGuid() { + if strings.Contains(guid.ID, "tmdb://") { + return "Plex metadata" + } + } + + // Check if it could be from Radarr/Sonarr + if mediaType == MediaTypeMovie && p.config.UseRadarr && p.radarrClient != nil { + // Quick check if movie exists in Radarr with this TMDb ID + movie, err := p.radarrClient.FindMovieMatch(item.GetTitle(), item.GetYear()) + if err == nil && movie != nil && p.radarrClient.GetTMDbIDFromMovie(movie) == tmdbID { + return "Radarr" + } + } + + if mediaType == MediaTypeTV && p.config.UseSonarr && p.sonarrClient != nil { + // Quick check if series exists in Sonarr with this TMDb ID + series, err := p.sonarrClient.FindSeriesMatch(item.GetTitle(), item.GetYear()) + if err == nil && series != nil && p.sonarrClient.GetTMDbIDFromSeries(series) == tmdbID { + return "Sonarr" + } + } + + // Must be from file path + return "file path" +} + // ExtractTMDbIDFromPath extracts TMDb ID from file path using regex func ExtractTMDbIDFromPath(filePath string) string { // Flexible regex pattern to match tmdb followed by digits with separators around the whole pattern diff --git a/internal/plex/client.go b/internal/plex/client.go index b60f7a3..28d039b 100644 --- a/internal/plex/client.go +++ b/internal/plex/client.go @@ -8,6 +8,7 @@ import ( "net/http" "net/url" "strings" + "time" "github.com/nullable-eth/labelarr/internal/config" ) @@ -139,6 +140,9 @@ func (c *Client) GetMovieDetails(ratingKey string) (*Movie, error) { // UpdateMediaField updates a media item's field (labels or genres) with new keywords func (c *Client) UpdateMediaField(mediaID, libraryID string, keywords []string, updateField string, mediaType string) error { + if c.config.VerboseLogging { + fmt.Printf(" 🌐 Making Plex API call to update %s field with %d keywords\n", updateField, len(keywords)) + } return c.updateMediaField(mediaID, libraryID, keywords, updateField, c.getMediaTypeForLibraryType(mediaType)) } @@ -255,6 +259,8 @@ func (c *Client) GetTVShowEpisodes(ratingKey string) ([]Episode, error) { // updateMediaField is a generic function to update media fields (movies: type=1, TV shows: type=2) func (c *Client) updateMediaField(mediaID, libraryID string, keywords []string, updateField string, mediaType int) error { + startTime := time.Now() + // Build the base URL baseURL := c.buildURL(fmt.Sprintf("/library/sections/%s/all", libraryID)) @@ -299,6 +305,11 @@ func (c *Client) updateMediaField(mediaID, libraryID string, keywords []string, body, _ := io.ReadAll(resp.Body) return fmt.Errorf("plex API returned status %d when updating media field - Response: %s", resp.StatusCode, string(body)) } + + if c.config.VerboseLogging { + duration := time.Since(startTime) + fmt.Printf(" ⏱️ Plex API call completed in %v\n", duration) + } return nil } diff --git a/internal/radarr/client.go b/internal/radarr/client.go new file mode 100644 index 0000000..e4a07ab --- /dev/null +++ b/internal/radarr/client.go @@ -0,0 +1,241 @@ +package radarr + +import ( + "encoding/json" + "fmt" + "net/http" + "net/url" + "strconv" + "strings" + "time" +) + +// Client represents a Radarr API client +type Client struct { + baseURL string + apiKey string + httpClient *http.Client +} + +// NewClient creates a new Radarr API client +func NewClient(baseURL, apiKey string) *Client { + // Ensure baseURL doesn't have trailing slash + baseURL = strings.TrimRight(baseURL, "/") + + return &Client{ + baseURL: baseURL, + apiKey: apiKey, + httpClient: &http.Client{ + Timeout: 30 * time.Second, + }, + } +} + +// makeRequest performs an API request to Radarr +func (c *Client) makeRequest(method, endpoint string, params url.Values) (*http.Response, error) { + fullURL := fmt.Sprintf("%s%s", c.baseURL, endpoint) + + if params != nil && len(params) > 0 { + fullURL = fmt.Sprintf("%s?%s", fullURL, params.Encode()) + } + + req, err := http.NewRequest(method, fullURL, nil) + if err != nil { + return nil, fmt.Errorf("error creating request: %w", err) + } + + req.Header.Set("X-Api-Key", c.apiKey) + req.Header.Set("Accept", "application/json") + req.Header.Set("Content-Type", "application/json") + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("error making request: %w", err) + } + + if resp.StatusCode != http.StatusOK { + resp.Body.Close() + return nil, fmt.Errorf("radarr API returned status %d", resp.StatusCode) + } + + return resp, nil +} + +// GetAllMovies retrieves all movies from Radarr +func (c *Client) GetAllMovies() ([]Movie, error) { + resp, err := c.makeRequest("GET", "/api/v3/movie", nil) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + var movies []Movie + if err := json.NewDecoder(resp.Body).Decode(&movies); err != nil { + return nil, fmt.Errorf("error decoding movies: %w", err) + } + + return movies, nil +} + +// GetMovieByTMDbID retrieves a movie by its TMDb ID +func (c *Client) GetMovieByTMDbID(tmdbID int) (*Movie, error) { + movies, err := c.GetAllMovies() + if err != nil { + return nil, err + } + + for _, movie := range movies { + if movie.TMDbID == tmdbID { + return &movie, nil + } + } + + return nil, fmt.Errorf("movie with TMDb ID %d not found", tmdbID) +} + +// SearchMovieByTitle searches for movies by title +func (c *Client) SearchMovieByTitle(title string) ([]Movie, error) { + // First try to get all movies and filter locally + // This is more reliable than using Radarr's search endpoint + allMovies, err := c.GetAllMovies() + if err != nil { + return nil, err + } + + var matches []Movie + titleLower := strings.ToLower(title) + + for _, movie := range allMovies { + if strings.Contains(strings.ToLower(movie.Title), titleLower) || + strings.Contains(strings.ToLower(movie.OriginalTitle), titleLower) { + matches = append(matches, movie) + continue + } + + // Check alternate titles + for _, altTitle := range movie.AlternateTitles { + if strings.Contains(strings.ToLower(altTitle.Title), titleLower) { + matches = append(matches, movie) + break + } + } + } + + return matches, nil +} + +// FindMovieMatch attempts to find the best match for a movie by title and year +func (c *Client) FindMovieMatch(title string, year int) (*Movie, error) { + movies, err := c.SearchMovieByTitle(title) + if err != nil { + return nil, err + } + + // First try exact title and year match + titleLower := strings.ToLower(title) + for _, movie := range movies { + if strings.ToLower(movie.Title) == titleLower && movie.Year == year { + return &movie, nil + } + } + + // Then try year match with similar title + for _, movie := range movies { + if movie.Year == year { + return &movie, nil + } + } + + // If still no match, try within 1 year range + for _, movie := range movies { + if movie.Year >= year-1 && movie.Year <= year+1 { + return &movie, nil + } + } + + // Return first match if any found + if len(movies) > 0 { + return &movies[0], nil + } + + return nil, fmt.Errorf("no movie match found for: %s (%d)", title, year) +} + +// GetSystemStatus retrieves Radarr system status (useful for testing connection) +func (c *Client) GetSystemStatus() (*SystemStatus, error) { + resp, err := c.makeRequest("GET", "/api/v3/system/status", nil) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + var status SystemStatus + if err := json.NewDecoder(resp.Body).Decode(&status); err != nil { + return nil, fmt.Errorf("error decoding system status: %w", err) + } + + return &status, nil +} + +// TestConnection tests the connection to Radarr +func (c *Client) TestConnection() error { + _, err := c.GetSystemStatus() + return err +} + +// GetMovieByIMDbID retrieves a movie by its IMDb ID +func (c *Client) GetMovieByIMDbID(imdbID string) (*Movie, error) { + // Normalize IMDb ID format + if !strings.HasPrefix(imdbID, "tt") { + imdbID = "tt" + imdbID + } + + movies, err := c.GetAllMovies() + if err != nil { + return nil, err + } + + for _, movie := range movies { + if movie.IMDbID == imdbID { + return &movie, nil + } + } + + return nil, fmt.Errorf("movie with IMDb ID %s not found", imdbID) +} + +// GetMovieByPath attempts to find a movie by its file path +func (c *Client) GetMovieByPath(filePath string) (*Movie, error) { + movies, err := c.GetAllMovies() + if err != nil { + return nil, err + } + + // Normalize the file path for comparison + filePathLower := strings.ToLower(filePath) + + for _, movie := range movies { + // Check if the file path is within the movie's folder + if movie.Path != "" && strings.Contains(filePathLower, strings.ToLower(movie.Path)) { + return &movie, nil + } + + // Also check against the movie file path if available + if movie.HasFile && movie.MovieFile.Path != "" { + if strings.EqualFold(movie.MovieFile.Path, filePath) || + strings.Contains(filePathLower, strings.ToLower(movie.MovieFile.Path)) { + return &movie, nil + } + } + } + + return nil, fmt.Errorf("movie not found for path: %s", filePath) +} + +// GetTMDbIDFromMovie extracts the TMDb ID from a Radarr movie +func (c *Client) GetTMDbIDFromMovie(movie *Movie) string { + if movie.TMDbID > 0 { + return strconv.Itoa(movie.TMDbID) + } + return "" +} \ No newline at end of file diff --git a/internal/radarr/types.go b/internal/radarr/types.go new file mode 100644 index 0000000..227e3a8 --- /dev/null +++ b/internal/radarr/types.go @@ -0,0 +1,71 @@ +package radarr + +// Movie represents a movie in Radarr +type Movie struct { + ID int `json:"id"` + Title string `json:"title"` + OriginalTitle string `json:"originalTitle,omitempty"` + AlternateTitles []AlternateTitle `json:"alternateTitles,omitempty"` + Year int `json:"year"` + TMDbID int `json:"tmdbId"` + IMDbID string `json:"imdbId,omitempty"` + Images []Image `json:"images,omitempty"` + Status string `json:"status"` + Path string `json:"path"` + FolderName string `json:"folderName,omitempty"` + MovieFile MovieFile `json:"movieFile,omitempty"` + HasFile bool `json:"hasFile"` + Monitored bool `json:"monitored"` + MinimumAvailability string `json:"minimumAvailability"` + IsAvailable bool `json:"isAvailable"` + ProfileID int `json:"profileId"` + Runtime int `json:"runtime"` + CleanTitle string `json:"cleanTitle"` + TitleSlug string `json:"titleSlug"` +} + +// AlternateTitle represents alternate titles for a movie +type AlternateTitle struct { + SourceType string `json:"sourceType"` + MovieID int `json:"movieId"` + Title string `json:"title"` + CleanTitle string `json:"cleanTitle"` +} + +// Image represents movie artwork +type Image struct { + CoverType string `json:"coverType"` + URL string `json:"url"` + RemoteURL string `json:"remoteUrl,omitempty"` +} + +// MovieFile represents the actual file for a movie +type MovieFile struct { + ID int `json:"id"` + MovieID int `json:"movieId"` + RelativePath string `json:"relativePath"` + Path string `json:"path"` + Size int64 `json:"size"` + DateAdded string `json:"dateAdded"` +} + +// SearchResult represents a movie search result +type SearchResult struct { + Movie +} + +// SystemStatus represents Radarr system status +type SystemStatus struct { + Version string `json:"version"` + BuildTime string `json:"buildTime"` + IsDebug bool `json:"isDebug"` + IsProduction bool `json:"isProduction"` + IsAdmin bool `json:"isAdmin"` + IsUserInteractive bool `json:"isUserInteractive"` + StartupPath string `json:"startupPath"` + AppData string `json:"appData"` + OsName string `json:"osName"` + OsVersion string `json:"osVersion"` + Branch string `json:"branch"` + Authentication string `json:"authentication"` +} \ No newline at end of file diff --git a/internal/sonarr/client.go b/internal/sonarr/client.go new file mode 100644 index 0000000..89c5cc2 --- /dev/null +++ b/internal/sonarr/client.go @@ -0,0 +1,268 @@ +package sonarr + +import ( + "encoding/json" + "fmt" + "net/http" + "net/url" + "strconv" + "strings" + "time" +) + +// Client represents a Sonarr API client +type Client struct { + baseURL string + apiKey string + httpClient *http.Client +} + +// NewClient creates a new Sonarr API client +func NewClient(baseURL, apiKey string) *Client { + // Ensure baseURL doesn't have trailing slash + baseURL = strings.TrimRight(baseURL, "/") + + return &Client{ + baseURL: baseURL, + apiKey: apiKey, + httpClient: &http.Client{ + Timeout: 30 * time.Second, + }, + } +} + +// makeRequest performs an API request to Sonarr +func (c *Client) makeRequest(method, endpoint string, params url.Values) (*http.Response, error) { + fullURL := fmt.Sprintf("%s%s", c.baseURL, endpoint) + + if params != nil && len(params) > 0 { + fullURL = fmt.Sprintf("%s?%s", fullURL, params.Encode()) + } + + req, err := http.NewRequest(method, fullURL, nil) + if err != nil { + return nil, fmt.Errorf("error creating request: %w", err) + } + + req.Header.Set("X-Api-Key", c.apiKey) + req.Header.Set("Accept", "application/json") + req.Header.Set("Content-Type", "application/json") + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("error making request: %w", err) + } + + if resp.StatusCode != http.StatusOK { + resp.Body.Close() + return nil, fmt.Errorf("sonarr API returned status %d", resp.StatusCode) + } + + return resp, nil +} + +// GetAllSeries retrieves all TV series from Sonarr +func (c *Client) GetAllSeries() ([]Series, error) { + resp, err := c.makeRequest("GET", "/api/v3/series", nil) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + var series []Series + if err := json.NewDecoder(resp.Body).Decode(&series); err != nil { + return nil, fmt.Errorf("error decoding series: %w", err) + } + + return series, nil +} + +// GetSeriesByTMDbID retrieves a series by its TMDb ID +func (c *Client) GetSeriesByTMDbID(tmdbID int) (*Series, error) { + series, err := c.GetAllSeries() + if err != nil { + return nil, err + } + + for _, s := range series { + if s.TMDBID == tmdbID { + return &s, nil + } + } + + return nil, fmt.Errorf("series with TMDb ID %d not found", tmdbID) +} + +// GetSeriesByTVDbID retrieves a series by its TVDb ID +func (c *Client) GetSeriesByTVDbID(tvdbID int) (*Series, error) { + series, err := c.GetAllSeries() + if err != nil { + return nil, err + } + + for _, s := range series { + if s.TVDbID == tvdbID { + return &s, nil + } + } + + return nil, fmt.Errorf("series with TVDb ID %d not found", tvdbID) +} + +// SearchSeriesByTitle searches for series by title +func (c *Client) SearchSeriesByTitle(title string) ([]Series, error) { + // First try to get all series and filter locally + // This is more reliable than using Sonarr's search endpoint + allSeries, err := c.GetAllSeries() + if err != nil { + return nil, err + } + + var matches []Series + titleLower := strings.ToLower(title) + + for _, series := range allSeries { + if strings.Contains(strings.ToLower(series.Title), titleLower) || + strings.Contains(strings.ToLower(series.SortTitle), titleLower) { + matches = append(matches, series) + continue + } + + // Check alternate titles + for _, altTitle := range series.AlternateTitles { + if strings.Contains(strings.ToLower(altTitle.Title), titleLower) { + matches = append(matches, series) + break + } + } + } + + return matches, nil +} + +// FindSeriesMatch attempts to find the best match for a series by title and year +func (c *Client) FindSeriesMatch(title string, year int) (*Series, error) { + series, err := c.SearchSeriesByTitle(title) + if err != nil { + return nil, err + } + + // First try exact title and year match + titleLower := strings.ToLower(title) + for _, s := range series { + if strings.ToLower(s.Title) == titleLower && s.Year == year { + return &s, nil + } + } + + // Then try year match with similar title + for _, s := range series { + if s.Year == year { + return &s, nil + } + } + + // If still no match, try within 1 year range + for _, s := range series { + if s.Year >= year-1 && s.Year <= year+1 { + return &s, nil + } + } + + // Return first match if any found + if len(series) > 0 { + return &series[0], nil + } + + return nil, fmt.Errorf("no series match found for: %s (%d)", title, year) +} + +// GetSystemStatus retrieves Sonarr system status (useful for testing connection) +func (c *Client) GetSystemStatus() (*SystemStatus, error) { + resp, err := c.makeRequest("GET", "/api/v3/system/status", nil) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + var status SystemStatus + if err := json.NewDecoder(resp.Body).Decode(&status); err != nil { + return nil, fmt.Errorf("error decoding system status: %w", err) + } + + return &status, nil +} + +// TestConnection tests the connection to Sonarr +func (c *Client) TestConnection() error { + _, err := c.GetSystemStatus() + return err +} + +// GetSeriesByIMDbID retrieves a series by its IMDb ID +func (c *Client) GetSeriesByIMDbID(imdbID string) (*Series, error) { + // Normalize IMDb ID format + if !strings.HasPrefix(imdbID, "tt") { + imdbID = "tt" + imdbID + } + + series, err := c.GetAllSeries() + if err != nil { + return nil, err + } + + for _, s := range series { + if s.IMDBID == imdbID { + return &s, nil + } + } + + return nil, fmt.Errorf("series with IMDb ID %s not found", imdbID) +} + +// GetSeriesByPath attempts to find a series by its file path +func (c *Client) GetSeriesByPath(filePath string) (*Series, error) { + series, err := c.GetAllSeries() + if err != nil { + return nil, err + } + + // Normalize the file path for comparison + filePathLower := strings.ToLower(filePath) + + for _, s := range series { + // Check if the file path is within the series' folder + if s.Path != "" && strings.Contains(filePathLower, strings.ToLower(s.Path)) { + return &s, nil + } + } + + return nil, fmt.Errorf("series not found for path: %s", filePath) +} + +// GetTMDbIDFromSeries extracts the TMDb ID from a Sonarr series +func (c *Client) GetTMDbIDFromSeries(series *Series) string { + if series.TMDBID > 0 { + return strconv.Itoa(series.TMDBID) + } + return "" +} + +// GetEpisodesBySeries gets all episodes for a series +func (c *Client) GetEpisodesBySeries(seriesID int) ([]Episode, error) { + params := url.Values{} + params.Set("seriesId", strconv.Itoa(seriesID)) + + resp, err := c.makeRequest("GET", "/api/v3/episode", params) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + var episodes []Episode + if err := json.NewDecoder(resp.Body).Decode(&episodes); err != nil { + return nil, fmt.Errorf("error decoding episodes: %w", err) + } + + return episodes, nil +} \ No newline at end of file diff --git a/internal/sonarr/types.go b/internal/sonarr/types.go new file mode 100644 index 0000000..12485c2 --- /dev/null +++ b/internal/sonarr/types.go @@ -0,0 +1,90 @@ +package sonarr + +// Series represents a TV series in Sonarr +type Series struct { + ID int `json:"id"` + Title string `json:"title"` + AlternateTitles []AlternateTitle `json:"alternateTitles,omitempty"` + SortTitle string `json:"sortTitle"` + Year int `json:"year"` + TVDbID int `json:"tvdbId"` + TVRageID int `json:"tvRageId,omitempty"` + TVMazeID int `json:"tvMazeId,omitempty"` + TMDBID int `json:"tmdbId,omitempty"` + IMDBID string `json:"imdbId,omitempty"` + Status string `json:"status"` + Path string `json:"path"` + Images []Image `json:"images,omitempty"` + Seasons []Season `json:"seasons,omitempty"` + QualityProfileID int `json:"qualityProfileId"` + SeasonFolder bool `json:"seasonFolder"` + Monitored bool `json:"monitored"` + Runtime int `json:"runtime"` + SeriesType string `json:"seriesType"` + CleanTitle string `json:"cleanTitle"` + TitleSlug string `json:"titleSlug"` + FirstAired string `json:"firstAired,omitempty"` + Added string `json:"added"` +} + +// AlternateTitle represents alternate titles for a series +type AlternateTitle struct { + Title string `json:"title"` + SeasonNumber int `json:"seasonNumber,omitempty"` +} + +// Image represents series artwork +type Image struct { + CoverType string `json:"coverType"` + URL string `json:"url"` + RemoteURL string `json:"remoteUrl,omitempty"` +} + +// Season represents a season of a TV series +type Season struct { + SeasonNumber int `json:"seasonNumber"` + Monitored bool `json:"monitored"` +} + +// Episode represents an episode of a TV series +type Episode struct { + ID int `json:"id"` + SeriesID int `json:"seriesId"` + EpisodeFileID int `json:"episodeFileId"` + SeasonNumber int `json:"seasonNumber"` + EpisodeNumber int `json:"episodeNumber"` + Title string `json:"title"` + AirDate string `json:"airDate,omitempty"` + AirDateUTC string `json:"airDateUtc,omitempty"` + HasFile bool `json:"hasFile"` + Monitored bool `json:"monitored"` + AbsoluteEpisodeNumber int `json:"absoluteEpisodeNumber,omitempty"` + EpisodeFile *EpisodeFile `json:"episodeFile,omitempty"` +} + +// EpisodeFile represents the actual file for an episode +type EpisodeFile struct { + ID int `json:"id"` + SeriesID int `json:"seriesId"` + SeasonNumber int `json:"seasonNumber"` + RelativePath string `json:"relativePath"` + Path string `json:"path"` + Size int64 `json:"size"` + DateAdded string `json:"dateAdded"` +} + +// SystemStatus represents Sonarr system status +type SystemStatus struct { + Version string `json:"version"` + BuildTime string `json:"buildTime"` + IsDebug bool `json:"isDebug"` + IsProduction bool `json:"isProduction"` + IsAdmin bool `json:"isAdmin"` + IsUserInteractive bool `json:"isUserInteractive"` + StartupPath string `json:"startupPath"` + AppData string `json:"appData"` + OsName string `json:"osName"` + OsVersion string `json:"osVersion"` + Branch string `json:"branch"` + Authentication string `json:"authentication"` +} \ No newline at end of file diff --git a/internal/storage/storage.go b/internal/storage/storage.go new file mode 100644 index 0000000..a09613b --- /dev/null +++ b/internal/storage/storage.go @@ -0,0 +1,135 @@ +package storage + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "sync" + "time" +) + +// ProcessedItem represents an item that has been processed +type ProcessedItem struct { + RatingKey string `json:"ratingKey"` + Title string `json:"title"` + TMDbID string `json:"tmdbId"` + LastProcessed time.Time `json:"lastProcessed"` + KeywordsSynced bool `json:"keywordsSynced"` + UpdateField string `json:"updateField"` +} + +// Storage handles persistent storage of processed items +type Storage struct { + filePath string + data map[string]*ProcessedItem + mutex sync.RWMutex +} + +// NewStorage creates a new storage instance +func NewStorage(dataDir string) (*Storage, error) { + // Create data directory if it doesn't exist + if err := os.MkdirAll(dataDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create data directory: %w", err) + } + + filePath := filepath.Join(dataDir, "processed_items.json") + + s := &Storage{ + filePath: filePath, + data: make(map[string]*ProcessedItem), + } + + // Load existing data + if err := s.load(); err != nil { + // If file doesn't exist, that's OK - we'll create it + if !os.IsNotExist(err) { + return nil, fmt.Errorf("failed to load existing data: %w", err) + } + } + + return s, nil +} + +// load reads data from the JSON file +func (s *Storage) load() error { + data, err := os.ReadFile(s.filePath) + if err != nil { + return err + } + + return json.Unmarshal(data, &s.data) +} + +// save writes data to the JSON file +func (s *Storage) save() error { + data, err := json.MarshalIndent(s.data, "", " ") + if err != nil { + return err + } + + // Write to temp file first, then rename (atomic operation) + tempFile := s.filePath + ".tmp" + if err := os.WriteFile(tempFile, data, 0644); err != nil { + return err + } + + return os.Rename(tempFile, s.filePath) +} + +// Get retrieves a processed item by rating key +func (s *Storage) Get(ratingKey string) (*ProcessedItem, bool) { + s.mutex.RLock() + defer s.mutex.RUnlock() + + item, exists := s.data[ratingKey] + return item, exists +} + +// Set stores a processed item +func (s *Storage) Set(item *ProcessedItem) error { + s.mutex.Lock() + defer s.mutex.Unlock() + + s.data[item.RatingKey] = item + + return s.save() +} + +// GetAll returns all processed items +func (s *Storage) GetAll() map[string]*ProcessedItem { + s.mutex.RLock() + defer s.mutex.RUnlock() + + // Return a copy to avoid race conditions + result := make(map[string]*ProcessedItem) + for k, v := range s.data { + result[k] = v + } + + return result +} + +// Count returns the number of processed items +func (s *Storage) Count() int { + s.mutex.RLock() + defer s.mutex.RUnlock() + + return len(s.data) +} + +// Cleanup removes old processed items (older than specified duration) +func (s *Storage) Cleanup(maxAge time.Duration) error { + s.mutex.Lock() + defer s.mutex.Unlock() + + cutoff := time.Now().Add(-maxAge) + + for key, item := range s.data { + if item.LastProcessed.Before(cutoff) { + delete(s.data, key) + } + } + + return s.save() +} \ No newline at end of file diff --git a/internal/tmdb/client.go b/internal/tmdb/client.go index 8085526..ba4ec49 100644 --- a/internal/tmdb/client.go +++ b/internal/tmdb/client.go @@ -8,6 +8,7 @@ import ( "time" "github.com/nullable-eth/labelarr/internal/config" + "github.com/nullable-eth/labelarr/internal/utils" ) // Client represents a TMDb API client @@ -48,7 +49,11 @@ func (c *Client) GetMovieKeywords(tmdbID string) ([]string, error) { } if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("tmdb API returned status %d for movie %s", resp.StatusCode, tmdbID) + body, _ := io.ReadAll(resp.Body) + if resp.StatusCode == http.StatusUnauthorized { + return nil, fmt.Errorf("tmdb API authentication failed (status 401) - check your TMDB_READ_ACCESS_TOKEN. Response: %s", string(body)) + } + return nil, fmt.Errorf("tmdb API returned status %d for movie %s. Response: %s", resp.StatusCode, tmdbID, string(body)) } body, err := io.ReadAll(resp.Body) @@ -66,7 +71,19 @@ func (c *Client) GetMovieKeywords(tmdbID string) ([]string, error) { keywords[i] = keyword.Name } - return keywords, nil + // Normalize keywords for proper capitalization and spelling + normalizedKeywords := utils.NormalizeKeywords(keywords) + + // Show normalization in verbose mode + if c.config.VerboseLogging { + for i, original := range keywords { + if i < len(normalizedKeywords) && original != normalizedKeywords[i] { + fmt.Printf(" 📝 Normalized: \"%s\" → \"%s\"\n", original, normalizedKeywords[i]) + } + } + } + + return normalizedKeywords, nil } // GetTVShowKeywords fetches keywords for a TV show from TMDb @@ -93,7 +110,11 @@ func (c *Client) GetTVShowKeywords(tmdbID string) ([]string, error) { } if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("tmdb API returned status %d for TV show %s", resp.StatusCode, tmdbID) + body, _ := io.ReadAll(resp.Body) + if resp.StatusCode == http.StatusUnauthorized { + return nil, fmt.Errorf("tmdb API authentication failed (status 401) - check your TMDB_READ_ACCESS_TOKEN. Response: %s", string(body)) + } + return nil, fmt.Errorf("tmdb API returned status %d for TV show %s. Response: %s", resp.StatusCode, tmdbID, string(body)) } body, err := io.ReadAll(resp.Body) @@ -111,5 +132,49 @@ func (c *Client) GetTVShowKeywords(tmdbID string) ([]string, error) { keywords[i] = keyword.Name } - return keywords, nil + // Normalize keywords for proper capitalization and spelling + normalizedKeywords := utils.NormalizeKeywords(keywords) + + // Show normalization in verbose mode + if c.config.VerboseLogging { + for i, original := range keywords { + if i < len(normalizedKeywords) && original != normalizedKeywords[i] { + fmt.Printf(" 📝 Normalized: \"%s\" → \"%s\"\n", original, normalizedKeywords[i]) + } + } + } + + return normalizedKeywords, nil +} + +// TestConnection tests the TMDb API connection +func (c *Client) TestConnection() error { + // Test with a known movie ID (The Godfather) + testURL := "https://api.themoviedb.org/3/movie/238/keywords" + + req, err := http.NewRequest("GET", testURL, nil) + if err != nil { + return fmt.Errorf("failed to create test request: %w", err) + } + + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", c.config.TMDbReadAccessToken)) + req.Header.Set("Accept", "application/json") + + resp, err := c.httpClient.Do(req) + if err != nil { + return fmt.Errorf("failed to connect to TMDb API: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode == http.StatusUnauthorized { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("TMDb API authentication failed - invalid TMDB_READ_ACCESS_TOKEN. Response: %s", string(body)) + } + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("TMDb API test failed with status %d. Response: %s", resp.StatusCode, string(body)) + } + + return nil } diff --git a/internal/utils/normalize.go b/internal/utils/normalize.go new file mode 100644 index 0000000..6f8bb73 --- /dev/null +++ b/internal/utils/normalize.go @@ -0,0 +1,392 @@ +package utils + +import ( + "regexp" + "strings" + "unicode" +) + +// Common acronyms and abbreviations that should remain uppercase +var commonAcronyms = map[string]bool{ + "usa": true, + "uk": true, + "us": true, + "u.s.": true, + "fbi": true, + "cia": true, + "nsa": true, + "dea": true, + "atf": true, + "ice": true, + "epa": true, + "irs": true, + "sec": true, + "nasa": true, + "nypd": true, + "lapd": true, + "swat": true, + "dc": true, + "nyc": true, + "la": true, + "sf": true, + "ai": true, + "a.i.": true, + "cgi": true, + "vr": true, + "ar": true, + "3d": true, + "4k": true, + "hd": true, + "uhd": true, + "lgbt": true, + "lgbtq": true, + "wwi": true, + "wwii": true, + "ufo": true, + "tv": true, + "mtv": true, + "vhs": true, + "dvd": true, + "cd": true, + "dj": true, + "mc": true, + "bc": true, + "ad": true, + "bbc": true, + "cbs": true, + "nbc": true, + "abc": true, + "cnn": true, + "suv": true, + "rv": true, + "phd": true, + "md": true, + "ceo": true, + "cto": true, + "cfo": true, + "hr": true, + "it": true, + "pr": true, + "pc": true, + "mac": true, + "ios": true, + "os": true, +} + +// Words that should remain lowercase (articles, prepositions, conjunctions) +var lowercaseWords = map[string]bool{ + "a": true, + "an": true, + "and": true, + "as": true, + "at": true, + "but": true, + "by": true, + "for": true, + "from": true, + "in": true, + "into": true, + "nor": true, + "of": true, + "on": true, + "or": true, + "over": true, + "the": true, + "to": true, + "up": true, + "with": true, + "within": true, +} + +// Critical replacements for well-known abbreviations and misspellings +var criticalReplacements = map[string]string{ + "sci-fi": "Sci-Fi", + "scifi": "Sci-Fi", + "sci fi": "Sci-Fi", + "romcom": "Romantic Comedy", + "rom-com": "Romantic Comedy", + "bio-pic": "Biopic", + "bio pic": "Biopic", + "neo-noir": "Neo-Noir", + "neo noir": "Neo-Noir", + "duringcreditsstinger": "During Credits Stinger", + "aftercreditsstinger": "After Credits Stinger", + "midcreditsstinger": "Mid Credits Stinger", +} + +// Smart pattern matchers for dynamic normalization +var ( + // Match decade patterns like "1940s", "1990s" + decadePattern = regexp.MustCompile(`^\d{4}s$`) + + // Match hyphenated compound words that should preserve hyphens + hyphenatedPattern = regexp.MustCompile(`^[\w]+-[\w]+`) + + // Match "X vs Y" patterns + versusPattern = regexp.MustCompile(`\b(\w+)\s+vs\s+(\w+)\b`) + + // Match "based on X" patterns + basedOnPattern = regexp.MustCompile(`^based on (.+)$`) + + // Match relationship patterns like "father daughter", "mother son" + relationshipPattern = regexp.MustCompile(`^(father|mother|parent|brother|sister|son|daughter)\s+(father|mother|parent|brother|sister|son|daughter)(?:\s+relationship)?$`) + + // Match city/state patterns like "san francisco, california" + cityStatePattern = regexp.MustCompile(`^([^,]+),\s*([^,]+)$`) + + // Match ethnicity/nationality + descriptive word patterns + ethnicityPattern = regexp.MustCompile(`^(african|asian|european|american|british|french|german|italian|spanish|chinese|japanese|korean|indian|mexican|latin|hispanic)\s+(american|lead|character|protagonist|antagonist|actor|actress)$`) + + // Match patterns with acronyms in parentheses like "central intelligence agency (cia)" + acronymInParensPattern = regexp.MustCompile(`^(.+)\s+\(([a-z.]+)\)$`) + + // Match potential organization/agency patterns like "dea agent", "fbi director" + agencyPattern = regexp.MustCompile(`^([a-z]{2,5})\s+(agent|director|officer|investigator|detective|operative|analyst|chief|deputy|special agent)$`) + + // Match century patterns like "5th century bc", "10th century" + centuryPattern = regexp.MustCompile(`^(\d+)(st|nd|rd|th)\s+century(\s+[a-z]+)?$`) +) + +// NormalizeKeyword normalizes a single keyword with proper capitalization +func NormalizeKeyword(keyword string) string { + // Trim whitespace + keyword = strings.TrimSpace(keyword) + if keyword == "" { + return keyword + } + + // Convert to lowercase for pattern matching + lowerKeyword := strings.ToLower(keyword) + + // 1. Check critical replacements first (known abbreviations) + if replacement, exists := criticalReplacements[lowerKeyword]; exists { + return replacement + } + + // 2. Pattern-based normalization + if normalized := applyPatternNormalization(lowerKeyword); normalized != "" { + return normalized + } + + // 3. Check if it's a known acronym (return as-is if all caps) + if commonAcronyms[lowerKeyword] { + return strings.ToUpper(keyword) + } + + // 4. Apply intelligent title casing + return applyTitleCase(keyword) +} + +// applyPatternNormalization applies pattern-based rules +func applyPatternNormalization(keyword string) string { + // Decades (1940s, 1990s, etc.) + if decadePattern.MatchString(keyword) { + return keyword // Keep as-is + } + + // City, State patterns (san francisco, california) + if matches := cityStatePattern.FindStringSubmatch(keyword); matches != nil { + city := applyTitleCase(matches[1]) + state := applyTitleCase(matches[2]) + return city + ", " + state + } + + // "X vs Y" patterns + if matches := versusPattern.FindStringSubmatch(keyword); matches != nil { + return applyTitleCase(matches[1]) + " vs " + applyTitleCase(matches[2]) + } + + // "based on X" patterns + if matches := basedOnPattern.FindStringSubmatch(keyword); matches != nil { + return "Based on " + applyTitleCase(matches[1]) + } + + // Relationship patterns (father daughter relationship) + if relationshipPattern.MatchString(keyword) { + parts := strings.Fields(keyword) + normalized := make([]string, len(parts)) + for i, part := range parts { + normalized[i] = titleCase(part) + } + // Add "Relationship" if not present + result := strings.Join(normalized, " ") + if !strings.HasSuffix(strings.ToLower(result), "relationship") { + result += " Relationship" + } + return result + } + + // Ethnicity + descriptor patterns (african american lead) + if ethnicityPattern.MatchString(keyword) { + parts := strings.Fields(keyword) + normalized := make([]string, len(parts)) + for i, part := range parts { + normalized[i] = titleCase(part) + } + return strings.Join(normalized, " ") + } + + // Acronym in parentheses patterns (central intelligence agency (cia)) + if matches := acronymInParensPattern.FindStringSubmatch(keyword); matches != nil { + mainPart := applyTitleCase(matches[1]) + acronymPart := strings.ToUpper(matches[2]) + return mainPart + " (" + acronymPart + ")" + } + + // Agency/organization patterns (dea agent, fbi director) + if matches := agencyPattern.FindStringSubmatch(keyword); matches != nil { + agency := matches[1] + role := matches[2] + // Check if it's a known acronym or looks like one (2-4 letters) + if commonAcronyms[agency] || len(agency) <= 4 { + return strings.ToUpper(agency) + " " + titleCase(role) + } + // Otherwise just title case both parts + return titleCase(agency) + " " + titleCase(role) + } + + // Century patterns (5th century bc, 10th century) + if matches := centuryPattern.FindStringSubmatch(keyword); matches != nil { + century := matches[1] + matches[2] + " Century" + if matches[3] != "" { + // Handle BC/AD or other suffixes + suffix := strings.TrimSpace(matches[3]) + if commonAcronyms[suffix] || len(suffix) <= 2 { + century += " " + strings.ToUpper(suffix) + } else { + century += " " + titleCase(suffix) + } + } + return century + } + + return "" // No pattern matched +} + +// applyTitleCase applies intelligent title casing to a phrase +func applyTitleCase(phrase string) string { + words := strings.Fields(phrase) + if len(words) == 0 { + return phrase + } + + // Title case each word + for i, word := range words { + lowerWord := strings.ToLower(word) + + // Check if it's an acronym + if commonAcronyms[lowerWord] { + words[i] = strings.ToUpper(word) + } else if i == 0 || !lowercaseWords[lowerWord] { + // Capitalize first word and any word that's not an article/preposition + words[i] = titleCase(word) + } else { + // Keep articles/prepositions lowercase (unless first word) + words[i] = strings.ToLower(word) + } + } + + return strings.Join(words, " ") +} + +// titleCase converts a word to title case, preserving existing uppercase if mixed case +func titleCase(s string) string { + if len(s) == 0 { + return s + } + + // Check if word has mixed case (like "McDonald" or "iPhone") + hasMixedCase := false + hasLower := false + hasUpper := false + + for _, r := range s { + if unicode.IsLower(r) { + hasLower = true + } + if unicode.IsUpper(r) { + hasUpper = true + } + } + + hasMixedCase = hasLower && hasUpper + + // If mixed case, preserve it + if hasMixedCase { + return s + } + + // Otherwise, title case it + runes := []rune(strings.ToLower(s)) + runes[0] = unicode.ToUpper(runes[0]) + return string(runes) +} + +// NormalizeKeywords normalizes a list of keywords +func NormalizeKeywords(keywords []string) []string { + normalized := make([]string, 0, len(keywords)) + seen := make(map[string]bool) + + for _, keyword := range keywords { + norm := NormalizeKeyword(keyword) + + // Avoid duplicates after normalization + normLower := strings.ToLower(norm) + if !seen[normLower] { + normalized = append(normalized, norm) + seen[normLower] = true + } + } + + return normalized +} + +// CleanDuplicateKeywords removes old unnormalized versions when normalized versions are present +// This helps clean up libraries that have both "sci-fi" and "Sci-Fi" after normalization +func CleanDuplicateKeywords(currentKeywords, newNormalizedKeywords []string) []string { + // Create a map of normalized keywords (lowercase) to their proper form + normalizedMap := make(map[string]string) + for _, keyword := range newNormalizedKeywords { + normalizedMap[strings.ToLower(keyword)] = keyword + } + + // Create reverse mapping - find what unnormalized versions should be replaced + toRemove := make(map[string]bool) + + // Check each current keyword to see if it should be replaced by a normalized version + for _, current := range currentKeywords { + // Try to normalize this current keyword + normalized := NormalizeKeyword(current) + normalizedLower := strings.ToLower(normalized) + + // If the normalized version exists in our new keywords and is different from current + if properForm, exists := normalizedMap[normalizedLower]; exists && current != properForm { + // Mark the old version for removal + toRemove[current] = true + } + } + + // Build the cleaned list + var cleaned []string + seen := make(map[string]bool) + + // First, add all current keywords that aren't being replaced + for _, keyword := range currentKeywords { + lowerKeyword := strings.ToLower(keyword) + if !toRemove[keyword] && !seen[lowerKeyword] { + cleaned = append(cleaned, keyword) + seen[lowerKeyword] = true + } + } + + // Then add all new normalized keywords + for _, keyword := range newNormalizedKeywords { + lowerKeyword := strings.ToLower(keyword) + if !seen[lowerKeyword] { + cleaned = append(cleaned, keyword) + seen[lowerKeyword] = true + } + } + + return cleaned +} \ No newline at end of file diff --git a/internal/utils/normalize_test.go b/internal/utils/normalize_test.go new file mode 100644 index 0000000..fec9470 --- /dev/null +++ b/internal/utils/normalize_test.go @@ -0,0 +1,243 @@ +package utils + +import ( + "testing" +) + +// TestNormalizeKeyword tests the keyword normalization functionality +// It covers various patterns including title casing, acronyms, special replacements, +// and pattern-based normalization for agencies, centuries, locations, etc. +func TestNormalizeKeyword(t *testing.T) { + tests := []struct { + input string + expected string + }{ + // Basic capitalization + {"action", "Action"}, + {"science fiction", "Science Fiction"}, + {"drama", "Drama"}, + + // Acronyms + {"fbi", "FBI"}, + {"cia", "CIA"}, + {"usa", "USA"}, + {"3d", "3D"}, + {"ai", "AI"}, + {"cgi", "CGI"}, + + // Critical replacements (hardcoded) + {"sci-fi", "Sci-Fi"}, + {"scifi", "Sci-Fi"}, + {"sci fi", "Sci-Fi"}, + {"bio-pic", "Biopic"}, + {"romcom", "Romantic Comedy"}, + {"neo-noir", "Neo-Noir"}, + {"duringcreditsstinger", "During Credits Stinger"}, + {"aftercreditsstinger", "After Credits Stinger"}, + + // Pattern-based: decades + {"1940s", "1940s"}, + {"1990s", "1990s"}, + + // Pattern-based: city, state + {"san francisco, california", "San Francisco, California"}, + {"new york, new york", "New York, New York"}, + + // Pattern-based: vs patterns + {"man vs nature", "Man vs Nature"}, + {"good vs evil", "Good vs Evil"}, + + // Pattern-based: based on + {"based on novel", "Based on Novel"}, + {"based on comic book", "Based on Comic Book"}, + {"based on short story", "Based on Short Story"}, + + // Pattern-based: relationships + {"father daughter", "Father Daughter Relationship"}, + {"father daughter relationship", "Father Daughter Relationship"}, + {"mother son", "Mother Son Relationship"}, + + // Pattern-based: ethnicity + {"african american lead", "African American Lead"}, + {"asian american character", "Asian American Character"}, + + // Pattern-based: acronyms in parentheses + {"central intelligence agency (cia)", "Central Intelligence Agency (CIA)"}, + {"artificial intelligence (a.i.)", "Artificial Intelligence (A.I.)"}, + {"united states (u.s.)", "United States (U.S.)"}, + + // Pattern-based: agency/organization roles + {"dea agent", "DEA Agent"}, + {"fbi director", "FBI Director"}, + {"cia operative", "CIA Operative"}, + {"nsa analyst", "NSA Analyst"}, + + // Pattern-based: centuries + {"5th century bc", "5th Century BC"}, + {"10th century", "10th Century"}, + {"21st century", "21st Century"}, + + // General title casing + {"car accident", "Car Accident"}, + {"crash landing", "Crash Landing"}, + {"giant monster", "Giant Monster"}, + {"alien race", "Alien Race"}, + {"dysfunctional relationship", "Dysfunctional Relationship"}, + {"short-term memory loss", "Short-Term Memory Loss"}, + {"screwball comedy", "Screwball Comedy"}, + {"tough cop", "Tough Cop"}, + {"fake fight", "Fake Fight"}, + {"racial segregation", "Racial Segregation"}, + {"racial tension", "Racial Tension"}, + {"racial prejudice", "Racial Prejudice"}, + {"high tech", "High Tech"}, + {"true love", "True Love"}, + {"brooklyn dodgers", "Brooklyn Dodgers"}, + + // Articles and prepositions + {"woman in peril", "Woman in Peril"}, + {"man of the house", "Man of the House"}, + {"tale of two cities", "Tale of Two Cities"}, + {"lord of the rings", "Lord of the Rings"}, + + // Mixed case preservation + {"McDonald", "McDonald"}, + {"iPhone", "iPhone"}, + {"eBay", "eBay"}, + + // Edge cases + {"", ""}, + {"a", "A"}, + {"THE", "The"}, + {"and", "And"}, + } + + for _, test := range tests { + result := NormalizeKeyword(test.input) + if result != test.expected { + t.Errorf("NormalizeKeyword(%q) = %q, expected %q", test.input, result, test.expected) + } + } +} + +// TestNormalizeKeywords tests the batch normalization functionality +// It ensures duplicates are removed after normalization and that +// all keywords are properly processed +func TestNormalizeKeywords(t *testing.T) { + input := []string{ + "action", + "sci-fi", + "fbi", + "based on novel", + "time travel", + "woman in peril", + "action", // duplicate + "ACTION", // duplicate but different case + } + + expected := []string{ + "Action", + "Sci-Fi", + "FBI", + "Based on Novel", + "Time Travel", + "Woman in Peril", + // duplicates should be removed + } + + result := NormalizeKeywords(input) + + if len(result) != len(expected) { + t.Errorf("Expected %d keywords, got %d", len(expected), len(result)) + } + + for i, exp := range expected { + if i >= len(result) || result[i] != exp { + t.Errorf("Expected keyword %d to be %q, got %q", i, exp, result[i]) + } + } +} + +// TestCleanDuplicateKeywords tests the duplicate cleaning functionality +// This ensures old unnormalized versions are removed when normalized versions are present +func TestCleanDuplicateKeywords(t *testing.T) { + tests := []struct { + name string + currentKeywords []string + newNormalizedKeywords []string + expected []string + }{ + { + name: "Remove old sci-fi variants", + currentKeywords: []string{"Action", "sci-fi", "Drama", "Custom Tag"}, + newNormalizedKeywords: []string{"Sci-Fi", "Time Travel"}, + expected: []string{"Action", "Drama", "Custom Tag", "Sci-Fi", "Time Travel"}, + }, + { + name: "Remove multiple duplicates", + currentKeywords: []string{"fbi", "cia", "action", "romcom", "Custom Label"}, + newNormalizedKeywords: []string{"FBI", "CIA", "Action", "Romantic Comedy"}, + expected: []string{"Custom Label", "FBI", "CIA", "Action", "Romantic Comedy"}, + }, + { + name: "Preserve manual keywords", + currentKeywords: []string{"My Custom Tag", "sci-fi", "Watched", "4K"}, + newNormalizedKeywords: []string{"Sci-Fi", "Adventure"}, + expected: []string{"My Custom Tag", "Watched", "4K", "Sci-Fi", "Adventure"}, + }, + { + name: "Handle agency patterns", + currentKeywords: []string{"dea agent", "fbi director", "Drama"}, + newNormalizedKeywords: []string{"DEA Agent", "FBI Director"}, + expected: []string{"Drama", "DEA Agent", "FBI Director"}, + }, + { + name: "No duplicates to clean", + currentKeywords: []string{"Action", "Drama", "My Tag"}, + newNormalizedKeywords: []string{"Sci-Fi", "Adventure"}, + expected: []string{"Action", "Drama", "My Tag", "Sci-Fi", "Adventure"}, + }, + { + name: "Complex normalization patterns", + currentKeywords: []string{"central intelligence agency (cia)", "5th century bc", "Custom"}, + newNormalizedKeywords: []string{"Central Intelligence Agency (CIA)", "5th Century BC"}, + expected: []string{"Custom", "Central Intelligence Agency (CIA)", "5th Century BC"}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + result := CleanDuplicateKeywords(test.currentKeywords, test.newNormalizedKeywords) + + if len(result) != len(test.expected) { + t.Errorf("Expected %d keywords, got %d", len(test.expected), len(result)) + t.Errorf("Expected: %v", test.expected) + t.Errorf("Got: %v", result) + return + } + + // Convert to maps for easier comparison since order might vary + expectedMap := make(map[string]bool) + for _, keyword := range test.expected { + expectedMap[keyword] = true + } + + resultMap := make(map[string]bool) + for _, keyword := range result { + resultMap[keyword] = true + } + + for keyword := range expectedMap { + if !resultMap[keyword] { + t.Errorf("Expected keyword %q not found in result", keyword) + } + } + + for keyword := range resultMap { + if !expectedMap[keyword] { + t.Errorf("Unexpected keyword %q found in result", keyword) + } + } + }) + } +} \ No newline at end of file