From 008dc8cc95ed9d922d7bf3c5bb5f1f3bcfb5d76e Mon Sep 17 00:00:00 2001 From: "nullable.eth" <2248325+nullable-eth@users.noreply.github.com> Date: Wed, 2 Jul 2025 20:31:49 -0400 Subject: [PATCH] feature: update regex to increase matching pattern --- Makefile | 84 ++++++++++ README.md | 96 ++++++++--- internal/media/processor.go | 6 +- internal/media/processor_test.go | 274 +++++++++++++++++++++++++++++++ 4 files changed, 438 insertions(+), 22 deletions(-) create mode 100644 Makefile create mode 100644 internal/media/processor_test.go diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..d493cdc --- /dev/null +++ b/Makefile @@ -0,0 +1,84 @@ +# Makefile for labelarr + +# Go parameters +GOCMD=go +GOBUILD=$(GOCMD) build +GOCLEAN=$(GOCMD) clean +GOTEST=$(GOCMD) test +GOGET=$(GOCMD) get +GOMOD=$(GOCMD) mod +BINARY_NAME=labelarr +BINARY_PATH=./cmd/labelarr + +# Build the application +.PHONY: build +build: + $(GOBUILD) -o $(BINARY_NAME) $(BINARY_PATH) + +# Run tests +.PHONY: test +test: + $(GOTEST) -v ./... + +# Run tests with coverage +.PHONY: test-coverage +test-coverage: + $(GOTEST) -v -cover ./... + +# Run tests with coverage report +.PHONY: test-coverage-html +test-coverage-html: + $(GOTEST) -v -coverprofile=coverage.out ./... + $(GOCMD) tool cover -html=coverage.out -o coverage.html + @echo "Coverage report generated: coverage.html" + +# Run benchmarks +.PHONY: benchmark +benchmark: + $(GOTEST) -bench=. -benchmem ./... + +# Clean build artifacts +.PHONY: clean +clean: + $(GOCLEAN) + rm -f $(BINARY_NAME) + rm -f coverage.out coverage.html + +# Download dependencies +.PHONY: deps +deps: + $(GOMOD) download + $(GOMOD) tidy + +# Run linter (requires golangci-lint to be installed) +.PHONY: lint +lint: + golangci-lint run + +# Run the application +.PHONY: run +run: build + ./$(BINARY_NAME) + +# Build for multiple platforms +.PHONY: build-all +build-all: + GOOS=linux GOARCH=amd64 $(GOBUILD) -o $(BINARY_NAME)-linux-amd64 $(BINARY_PATH) + GOOS=windows GOARCH=amd64 $(GOBUILD) -o $(BINARY_NAME)-windows-amd64.exe $(BINARY_PATH) + GOOS=darwin GOARCH=amd64 $(GOBUILD) -o $(BINARY_NAME)-darwin-amd64 $(BINARY_PATH) + +# Help target +.PHONY: help +help: + @echo "Available targets:" + @echo " build - Build the application" + @echo " test - Run all tests" + @echo " test-coverage - Run tests with coverage" + @echo " test-coverage-html - Generate HTML coverage report" + @echo " benchmark - Run benchmark tests" + @echo " clean - Clean build artifacts" + @echo " deps - Download and tidy dependencies" + @echo " lint - Run linter" + @echo " run - Build and run the application" + @echo " build-all - Build for multiple platforms" + @echo " help - Show this help message" \ No newline at end of file diff --git a/README.md b/README.md index 9aa45ab..2ac9064 100644 --- a/README.md +++ b/README.md @@ -150,35 +150,59 @@ services: The application can find TMDb IDs from multiple sources and supports flexible formats: - **Plex Metadata**: Standard TMDb agent IDs -- **File Paths**: `{tmdb-12345}` in filenames or directory names -- **Flexible Formats**: The TMDb ID can be detected in a variety of patterns, not just `{tmdb-12345}`. Supported patterns include: - - `{tmdb-12345}` (curly braces, anywhere in the folder or file name) - - `[tmdb-12345]` (square brackets) - - `(tmdb-12345)` (parentheses) - - `tmdb-12345` (standalone, with or without delimiters) - - Case-insensitive: `TMDB-12345`, `Tmdb-12345`, etc. - - The TMDb ID can appear in either the directory or file name, and can be surrounded by spaces or other characters. - - **Delimiters**: The TMDb ID pattern supports all common delimiters (such as `:`, `;`, `-`, `_`, etc.) between `tmdb` and the ID. For example: - - `tmdb:15448` - - `tmdb;15448` - - `tmdb-15448` - - `tmdb_15448` - - `tmdb: 15448`, `tmdb- 15448`, etc. - - These can appear in any of the supported bracket/brace/parenthesis formats or standalone. - - The pattern will **not** match `tmdb15448` (no separator). - -Example file paths: +- **File Paths**: Flexible TMDb ID detection in filenames or directory names + +### ✅ **Supported Patterns** (Case-Insensitive) + +The TMDb ID detection is very flexible and supports various formats: + +**Direct Concatenation:** + +- `/movies/The Matrix (1999) tmdb603/file.mkv` +- `/movies/Inception (2010) TMDB27205/file.mkv` +- `/movies/Avatar (2009) Tmdb19995/file.mkv` + +**With Separators:** + +- `/movies/Interstellar (2014) tmdb:157336/file.mkv` +- `/movies/The Dark Knight (2008) tmdb-155/file.mkv` +- `/movies/Pulp Fiction (1994) tmdb_680/file.mkv` +- `/movies/Fight Club (1999) tmdb=550/file.mkv` +- `/movies/The Shawshank Redemption (1994) tmdb 278/file.mkv` + +**With Brackets/Braces:** + +- `/movies/Goodfellas (1990) {tmdb634}/file.mkv` +- `/movies/Forrest Gump (1994) [tmdb-13]/file.mkv` +- `/movies/The Godfather (1972) (tmdb:238)/file.mkv` +- `/movies/Taxi Driver (1976) {tmdb=103}/file.mkv` +- `/movies/Casablanca (1942) (tmdb 289)/file.mkv` + +**Mixed Examples:** + +- `/movies/Citizen Kane (1941) something tmdb: 15678 extra/file.mkv` +- `/movies/Vertigo (1958) {tmdb=194884}/file.mkv` +- `/movies/Psycho (1960) [ tmdb-539 ]/file.mkv` + +### ❌ **Will NOT Match** + +- `mytmdb12345` (preceded by alphanumeric characters) +- `tmdb12345abc` (followed by alphanumeric characters) +- `tmdb` (no digits following) + +### 📁 **Example File Paths** ``` /movies/The Matrix (1999) [tmdb-603]/The Matrix.mkv /movies/Inception (2010) (tmdb:27205)/Inception.mkv -/movies/Avatar (2009) tmdb;19995/Avatar.mkv +/movies/Avatar (2009) tmdb19995/Avatar.mkv /movies/Interstellar (2014) TMDB_157336/Interstellar.mkv -/movies/Edge Case - {tmdb-12345}/file.mkv +/movies/Edge Case - {tmdb=12345}/file.mkv /movies/Colon: [tmdb:54321]/file.mkv /movies/Semicolon; (tmdb;67890)/file.mkv /movies/Underscore_tmdb_11111/file.mkv /movies/ExtraSuffix tmdb-22222_extra/file.mkv +/movies/Direct tmdb194884 format/file.mkv ``` @@ -523,6 +547,38 @@ If you have an existing movie library without TMDb IDs in file paths: **⚠️ Note**: Large libraries may take time to rename. Consider doing this in batches during low-usage periods. +### 📺 Sonarr Users: Renaming Existing Folders to Include TMDb ID + +If you're using Sonarr to manage your TV show collection and want to apply new folder naming that includes TMDb IDs, here's how to rename existing folders: + +#### **🔄 Apply the New Folder Names** + +To actually rename existing folders: + +1. **Go to the Series tab** + +2. **Click the Mass Editor** (three sliders icon) + +3. **Select the shows** you want to rename + +4. **At the bottom, click "Edit"** + +5. **In the popup:** + - Set the **Root Folder** to the same one it's already using (e.g., `/mnt/user/TV`) + - Click **"Save"** + +6. **Sonarr will interpret this as a move** and apply the new folder naming format without physically moving the files—just renaming the folders. + +#### **Example Result** + +After applying the new naming format, your TV show folders will include TMDb IDs: + +``` +/tv/Batman [tmdb-2287]/Season 3/Batman - S03E17 - The Joke's on Catwoman Bluray-1080p [tmdb-2287].mkv +``` + +**💡 Pro Tip**: This method works for renaming folders without actually moving files, making it safe and efficient for large TV libraries. +
diff --git a/internal/media/processor.go b/internal/media/processor.go index 3f0da13..dca28be 100644 --- a/internal/media/processor.go +++ b/internal/media/processor.go @@ -498,8 +498,10 @@ func (p *Processor) extractTVShowTMDbID(item MediaItem) string { // ExtractTMDbIDFromPath extracts TMDb ID from file path using regex func ExtractTMDbIDFromPath(filePath string) string { - // Updated regex pattern to match {tmdb-123456} anywhere in the path - re := regexp.MustCompile(`\{tmdb-(\d+)\}`) + // Flexible regex pattern to match tmdb followed by digits with separators around the whole pattern + // Matches: tmdb123, tmdb:123, {tmdb-456}, [tmdb=789], tmdb_012, etc. + // Requires word boundaries or separators around the tmdb+digits pattern + re := regexp.MustCompile(`(?i)(?:^|[^a-zA-Z0-9])tmdb[^a-zA-Z0-9]*(\d+)(?:[^a-zA-Z0-9]|$)`) matches := re.FindStringSubmatch(filePath) if len(matches) > 1 { return matches[1] diff --git a/internal/media/processor_test.go b/internal/media/processor_test.go new file mode 100644 index 0000000..4fc3a01 --- /dev/null +++ b/internal/media/processor_test.go @@ -0,0 +1,274 @@ +package media + +import "testing" + +func TestExtractTMDbIDFromPath(t *testing.T) { + tests := []struct { + name string + path string + expected string + }{ + // Direct Concatenation + { + name: "Direct concatenation lowercase", + path: "/movies/The Matrix (1999) tmdb603/file.mkv", + expected: "603", + }, + { + name: "Direct concatenation uppercase", + path: "/movies/Inception (2010) TMDB27205/file.mkv", + expected: "27205", + }, + { + name: "Direct concatenation mixed case", + path: "/movies/Avatar (2009) Tmdb19995/file.mkv", + expected: "19995", + }, + + // With Separators + { + name: "Colon separator", + path: "/movies/Interstellar (2014) tmdb:157336/file.mkv", + expected: "157336", + }, + { + name: "Dash separator", + path: "/movies/The Dark Knight (2008) tmdb-155/file.mkv", + expected: "155", + }, + { + name: "Underscore separator", + path: "/movies/Pulp Fiction (1994) tmdb_680/file.mkv", + expected: "680", + }, + { + name: "Equals separator", + path: "/movies/Fight Club (1999) tmdb=550/file.mkv", + expected: "550", + }, + { + name: "Space separator", + path: "/movies/The Shawshank Redemption (1994) tmdb 278/file.mkv", + expected: "278", + }, + + // With Brackets/Braces + { + name: "Curly braces", + path: "/movies/Goodfellas (1990) {tmdb634}/file.mkv", + expected: "634", + }, + { + name: "Square brackets with dash", + path: "/movies/Forrest Gump (1994) [tmdb-13]/file.mkv", + expected: "13", + }, + { + name: "Parentheses with colon", + path: "/movies/The Godfather (1972) (tmdb:238)/file.mkv", + expected: "238", + }, + { + name: "Curly braces with equals", + path: "/movies/Taxi Driver (1976) {tmdb=103}/file.mkv", + expected: "103", + }, + { + name: "Parentheses with space", + path: "/movies/Casablanca (1942) (tmdb 289)/file.mkv", + expected: "289", + }, + + // Mixed Examples + { + name: "Mixed with extra text", + path: "/movies/Citizen Kane (1941) something tmdb: 15678 extra/file.mkv", + expected: "15678", + }, + { + name: "Curly braces with equals complex", + path: "/movies/Vertigo (1958) {tmdb=194884}/file.mkv", + expected: "194884", + }, + { + name: "Brackets with spaces", + path: "/movies/Psycho (1960) [ tmdb-539 ]/file.mkv", + expected: "539", + }, + + // Original README Examples (Backward Compatibility) + { + name: "Original bracket format", + path: "/movies/The Matrix (1999) [tmdb-603]/The Matrix.mkv", + expected: "603", + }, + { + name: "Original parentheses format", + path: "/movies/Inception (2010) (tmdb:27205)/Inception.mkv", + expected: "27205", + }, + { + name: "Original direct format", + path: "/movies/Avatar (2009) tmdb19995/Avatar.mkv", + expected: "19995", + }, + { + name: "Original uppercase underscore", + path: "/movies/Interstellar (2014) TMDB_157336/Interstellar.mkv", + expected: "157336", + }, + + // Edge Cases - Multiple TMDb IDs (should match first) + { + name: "Multiple TMDb IDs - matches first", + path: "/movies/Movie tmdb123 and tmdb456/file.mkv", + expected: "123", + }, + { + name: "TMDb ID in directory and filename", + path: "/movies/Movie tmdb123/filename tmdb456.mkv", + expected: "123", + }, + + // Complex Real-World Examples + { + name: "Complex path with year and quality", + path: "/media/Movies/The Matrix (1999) [1080p] {tmdb-603} [x264]/The.Matrix.1999.1080p.BluRay.x264.mkv", + expected: "603", + }, + { + name: "Radarr style naming", + path: "/movies/Inception (2010) {tmdb-27205} [Bluray-1080p][x264][DTS 5.1]-GROUP/Inception.mkv", + expected: "27205", + }, + + // Should NOT Match Cases + { + name: "Should not match - preceded by alphanumeric", + path: "mytmdb12345", + expected: "", + }, + { + name: "Should not match - followed by alphanumeric", + path: "tmdb12345abc", + expected: "", + }, + { + name: "Should not match - no digits", + path: "tmdb", + expected: "", + }, + { + name: "Should not match - no digits after tmdb", + path: "/movies/My Favorite tmdb Movie/file.mkv", + expected: "", + }, + { + name: "Should not match - embedded in word", + path: "/movies/sometmdbmovie123/file.mkv", + expected: "", + }, + { + name: "Should not match - tmdb without proper boundary", + path: "/movies/notmdb123/file.mkv", + expected: "", + }, + + // Case Insensitive Tests + { + name: "Mixed case TMDB", + path: "/movies/Movie (2020) TmDb12345/file.mkv", + expected: "12345", + }, + { + name: "All caps TMDB", + path: "/movies/Movie (2020) TMDB12345/file.mkv", + expected: "12345", + }, + + // Special Characters and Unicode + { + name: "Path with special characters", + path: "/movies/Café & Bar (2020) tmdb12345/file.mkv", + expected: "12345", + }, + { + name: "Path with unicode", + path: "/movies/Crème Brûlée (2020) tmdb12345/file.mkv", + expected: "12345", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := ExtractTMDbIDFromPath(tt.path) + if result != tt.expected { + t.Errorf("ExtractTMDbIDFromPath(%q) = %q, want %q", tt.path, result, tt.expected) + } + }) + } +} + +// Benchmark test to ensure the regex is performant +func BenchmarkExtractTMDbIDFromPath(b *testing.B) { + testPath := "/movies/The Matrix (1999) [tmdb-603]/The Matrix.mkv" + + b.ResetTimer() + for i := 0; i < b.N; i++ { + ExtractTMDbIDFromPath(testPath) + } +} + +// Test with empty and edge case inputs +func TestExtractTMDbIDFromPathEdgeCases(t *testing.T) { + edgeCases := []struct { + name string + path string + expected string + }{ + { + name: "Empty string", + path: "", + expected: "", + }, + { + name: "Only filename", + path: "tmdb123.mkv", + expected: "123", + }, + { + name: "Root path", + path: "/tmdb123", + expected: "123", + }, + { + name: "Windows path", + path: "C:\\Movies\\Movie tmdb123\\file.mkv", + expected: "123", + }, + { + name: "Very long TMDb ID", + path: "/movies/Movie tmdb123456789012345/file.mkv", + expected: "123456789012345", + }, + { + name: "TMDb ID at start of path", + path: "tmdb123/movies/file.mkv", + expected: "123", + }, + { + name: "TMDb ID at end of path", + path: "/movies/file tmdb123", + expected: "123", + }, + } + + for _, tt := range edgeCases { + t.Run(tt.name, func(t *testing.T) { + result := ExtractTMDbIDFromPath(tt.path) + if result != tt.expected { + t.Errorf("ExtractTMDbIDFromPath(%q) = %q, want %q", tt.path, result, tt.expected) + } + }) + } +}