diff --git a/docs/specs/SPEC-noise-file-detection.md b/docs/specs/SPEC-noise-file-detection.md index 0d56cabf..2405a3d8 100644 --- a/docs/specs/SPEC-noise-file-detection.md +++ b/docs/specs/SPEC-noise-file-detection.md @@ -15,10 +15,13 @@ The runtime source of truth is the embedded JSON resource: | `thumbs.db` | Windows | Thumbnail cache | | `ehthumbs.db` | Windows | Media Center thumbnail cache | | `ehthumbs_vista.db` | Windows | Vista Media Center thumbnail cache | +| `$RECYCLE.BIN` | Windows | Recycle bin folder marker | | `.desktop.ini` | Windows/Linux legacy compatibility | Legacy hidden variant | | `.thumbs.db` | Windows/Linux legacy compatibility | Legacy hidden variant | | `.DS_Store` | macOS | Finder metadata | | `.AppleDouble` | macOS | Resource fork metadata | +| `.AppleDB` | macOS | Apple database file | +| `.AppleDesktop` | macOS | Apple desktop database file | | `.LSOverride` | macOS | Launch Services overrides | | `.Spotlight-V100` | macOS | Spotlight indexing data | | `.Trashes` | macOS | Trash metadata or folder marker | @@ -30,4 +33,4 @@ The runtime source of truth is the embedded JSON resource: ## Matching behavior - On Linux, matching is case-sensitive. -- On non-Linux platforms, matching is case-insensitive. +- On non-Linux platforms (including macOS), matching is case-insensitive; macOS remains intentionally case-insensitive for consistency with current product behavior and to reflect the deviation from the original issue wording. diff --git a/src/ByteSync.Client/Interfaces/Controls/Inventories/IFileSystemInspector.cs b/src/ByteSync.Client/Interfaces/Controls/Inventories/IFileSystemInspector.cs index bd08696d..3fbd3312 100644 --- a/src/ByteSync.Client/Interfaces/Controls/Inventories/IFileSystemInspector.cs +++ b/src/ByteSync.Client/Interfaces/Controls/Inventories/IFileSystemInspector.cs @@ -14,6 +14,8 @@ public interface IFileSystemInspector bool IsNoiseFileName(FileInfo fileInfo, OSPlatforms os); + bool IsNoiseDirectoryName(DirectoryInfo directoryInfo, OSPlatforms os); + bool IsReparsePoint(FileSystemInfo fsi); bool Exists(FileInfo fileInfo); @@ -21,4 +23,4 @@ public interface IFileSystemInspector bool IsOffline(FileInfo fileInfo); bool IsRecallOnDataAccess(FileInfo fileInfo); -} \ No newline at end of file +} diff --git a/src/ByteSync.Client/Models/Inventories/SkipReason.cs b/src/ByteSync.Client/Models/Inventories/SkipReason.cs index d065e5c9..449b1e91 100644 --- a/src/ByteSync.Client/Models/Inventories/SkipReason.cs +++ b/src/ByteSync.Client/Models/Inventories/SkipReason.cs @@ -5,7 +5,7 @@ public enum SkipReason Unknown = 0, Hidden = 1, SystemAttribute = 2, - NoiseFile = 3, + NoiseEntry = 3, Symlink = 4, SpecialPosixFile = 5, Offline = 6, diff --git a/src/ByteSync.Client/Services/Inventories/FileSystemInspector.cs b/src/ByteSync.Client/Services/Inventories/FileSystemInspector.cs index 47e33b74..911d149a 100644 --- a/src/ByteSync.Client/Services/Inventories/FileSystemInspector.cs +++ b/src/ByteSync.Client/Services/Inventories/FileSystemInspector.cs @@ -66,6 +66,11 @@ public bool IsNoiseFileName(FileInfo fileInfo, OSPlatforms os) return NoiseFileDetector.IsNoiseFileName(fileInfo.Name, os); } + public bool IsNoiseDirectoryName(DirectoryInfo directoryInfo, OSPlatforms os) + { + return NoiseFileDetector.IsNoiseFileName(directoryInfo.Name, os); + } + public bool IsReparsePoint(FileSystemInfo fsi) { return (fsi.Attributes & FileAttributes.ReparsePoint) == FileAttributes.ReparsePoint; diff --git a/src/ByteSync.Client/Services/Inventories/InventoryBuilder.cs b/src/ByteSync.Client/Services/Inventories/InventoryBuilder.cs index cd673e30..3c03da85 100644 --- a/src/ByteSync.Client/Services/Inventories/InventoryBuilder.cs +++ b/src/ByteSync.Client/Services/Inventories/InventoryBuilder.cs @@ -480,13 +480,22 @@ private void DoAnalyze(InventoryPart inventoryPart, DirectoryInfo directoryInfo, return; } - if (!IsRootPath(inventoryPart, directoryInfo) && ShouldIgnoreHiddenDirectory(directoryInfo)) + var isRoot = IsRootPath(inventoryPart, directoryInfo); + + if (!isRoot && ShouldIgnoreHiddenDirectory(directoryInfo)) { RecordSkippedEntry(inventoryPart, directoryInfo, SkipReason.Hidden, FileSystemEntryKind.Directory); return; } + if (!isRoot && ShouldIgnoreNoiseDirectory(directoryInfo)) + { + RecordSkippedEntry(inventoryPart, directoryInfo, SkipReason.NoiseEntry, FileSystemEntryKind.Directory); + + return; + } + var directoryDescription = IdentityBuilder.BuildDirectoryDescription(inventoryPart, directoryInfo); AddFileSystemDescription(inventoryPart, directoryDescription); @@ -535,6 +544,23 @@ private bool ShouldIgnoreHiddenFile(FileInfo fileInfo) return false; } + private bool ShouldIgnoreNoiseDirectory(DirectoryInfo directoryInfo) + { + if (!IgnoreSystem) + { + return false; + } + + if (FileSystemInspector.IsNoiseDirectoryName(directoryInfo, OSPlatform)) + { + _logger.LogInformation("Directory {Directory} is ignored because considered as noise", directoryInfo.FullName); + + return true; + } + + return false; + } + private SkipReason? GetSystemSkipReason(FileInfo fileInfo) { if (!IgnoreSystem) @@ -546,7 +572,7 @@ private bool ShouldIgnoreHiddenFile(FileInfo fileInfo) { _logger.LogInformation("File {File} is ignored because considered as noise", fileInfo.FullName); - return SkipReason.NoiseFile; + return SkipReason.NoiseEntry; } if (FileSystemInspector.IsSystemAttribute(fileInfo)) @@ -705,4 +731,4 @@ private void AddFileSystemDescription(InventoryPart inventoryPart, FileSystemDes } } } -} \ No newline at end of file +} diff --git a/src/ByteSync.Client/Services/Inventories/noise-files.json b/src/ByteSync.Client/Services/Inventories/noise-files.json index 22093319..008eadb1 100644 --- a/src/ByteSync.Client/Services/Inventories/noise-files.json +++ b/src/ByteSync.Client/Services/Inventories/noise-files.json @@ -3,10 +3,13 @@ "thumbs.db", "ehthumbs.db", "ehthumbs_vista.db", + "$RECYCLE.BIN", ".desktop.ini", ".thumbs.db", ".DS_Store", ".AppleDouble", + ".AppleDB", + ".AppleDesktop", ".LSOverride", ".Spotlight-V100", ".Trashes", diff --git a/tests/ByteSync.Client.UnitTests/Services/Inventories/FileSystemInspectorTests.cs b/tests/ByteSync.Client.UnitTests/Services/Inventories/FileSystemInspectorTests.cs index f69d72c2..6a61e25c 100644 --- a/tests/ByteSync.Client.UnitTests/Services/Inventories/FileSystemInspectorTests.cs +++ b/tests/ByteSync.Client.UnitTests/Services/Inventories/FileSystemInspectorTests.cs @@ -133,6 +133,44 @@ public void ClassifyEntry_FallsBackToRegularFile_WhenPosixClassifierThrows() } } + [Test] + public void IsNoiseDirectoryName_ShouldReturnTrue_ForKnownNoiseDirectory() + { + var inspector = new FileSystemInspector(); + var tempDirectory = Directory.CreateDirectory(Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N"))); + var noiseDirectory = Directory.CreateDirectory(Path.Combine(tempDirectory.FullName, "$RECYCLE.BIN")); + + try + { + var result = inspector.IsNoiseDirectoryName(noiseDirectory, OSPlatforms.Windows); + + result.Should().BeTrue(); + } + finally + { + Directory.Delete(tempDirectory.FullName, true); + } + } + + [Test] + public void IsNoiseDirectoryName_ShouldReturnFalse_ForUnknownDirectory() + { + var inspector = new FileSystemInspector(); + var tempDirectory = Directory.CreateDirectory(Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N"))); + var regularDirectory = Directory.CreateDirectory(Path.Combine(tempDirectory.FullName, "regular")); + + try + { + var result = inspector.IsNoiseDirectoryName(regularDirectory, OSPlatforms.Windows); + + result.Should().BeFalse(); + } + finally + { + Directory.Delete(tempDirectory.FullName, true); + } + } + [Test] public void IsNoiseFileName_ShouldReturnTrue_ForKnownNoiseFile() { diff --git a/tests/ByteSync.Client.UnitTests/Services/Inventories/InventoryBuilderInspectorTests.cs b/tests/ByteSync.Client.UnitTests/Services/Inventories/InventoryBuilderInspectorTests.cs index 3ac31b4c..33d40338 100644 --- a/tests/ByteSync.Client.UnitTests/Services/Inventories/InventoryBuilderInspectorTests.cs +++ b/tests/ByteSync.Client.UnitTests/Services/Inventories/InventoryBuilderInspectorTests.cs @@ -94,6 +94,9 @@ private static void SetupDefaultClassification(Mock inspec FileInfo => FileSystemEntryKind.RegularFile, _ => FileSystemEntryKind.Unknown }); + inspector + .Setup(i => i.IsNoiseDirectoryName(It.IsAny(), It.IsAny())) + .Returns(false); } [Test] @@ -265,7 +268,72 @@ public async Task Noise_Child_File_Is_Recorded() await builder.BuildBaseInventoryAsync(invPath); processData.SkippedEntries.Should() - .ContainSingle(e => e.Name == "thumbs.db" && e.Reason == SkipReason.NoiseFile); + .ContainSingle(e => e.Name == "thumbs.db" && e.Reason == SkipReason.NoiseEntry); + } + + [Test] + public async Task Noise_Child_Directory_Is_Recorded_And_Not_Traversed() + { + var insp = new Mock(MockBehavior.Strict); + SetupDefaultClassification(insp); + insp.Setup(i => i.IsHidden(It.IsAny(), It.IsAny())).Returns(false); + insp.Setup(i => i.IsHidden(It.IsAny(), It.IsAny())).Returns(false); + insp.Setup(i => i.IsNoiseDirectoryName(It.Is(di => di.Name == "$RECYCLE.BIN"), It.IsAny())) + .Returns(true); + insp.Setup(i => i.IsNoiseFileName(It.IsAny(), It.IsAny())).Returns(false); + insp.Setup(i => i.IsSystemAttribute(It.IsAny())).Returns(false); + insp.Setup(i => i.IsReparsePoint(It.IsAny())).Returns(false); + insp.Setup(i => i.Exists(It.IsAny())).Returns(true); + insp.Setup(i => i.IsOffline(It.IsAny())).Returns(false); + insp.Setup(i => i.IsRecallOnDataAccess(It.IsAny())).Returns(false); + var (builder, processData) = CreateBuilderWithData(insp.Object); + + var root = Directory.CreateDirectory(Path.Combine(TestDirectory.FullName, "root_noise_dir")); + var visiblePath = Path.Combine(root.FullName, "visible.txt"); + await File.WriteAllTextAsync(visiblePath, "x"); + + var noiseDirectory = Directory.CreateDirectory(Path.Combine(root.FullName, "$RECYCLE.BIN")); + var nestedNoiseFile = Path.Combine(noiseDirectory.FullName, "nested.txt"); + await File.WriteAllTextAsync(nestedNoiseFile, "x"); + + builder.AddInventoryPart(root.FullName); + var invPath = Path.Combine(TestDirectory.FullName, "inv_noise_dir.zip"); + await builder.BuildBaseInventoryAsync(invPath); + + var part = builder.Inventory.InventoryParts.Single(); + part.FileDescriptions.Should().ContainSingle(fd => fd.Name == "visible.txt"); + part.FileDescriptions.Should().NotContain(fd => fd.Name == "nested.txt"); + + processData.SkippedEntries.Should() + .ContainSingle(e => e.Name == "$RECYCLE.BIN" && e.Reason == SkipReason.NoiseEntry); + } + + [Test] + public async Task Noise_Root_Directory_Is_Analyzed() + { + var insp = new Mock(MockBehavior.Strict); + SetupDefaultClassification(insp); + insp.Setup(i => i.IsHidden(It.IsAny(), It.IsAny())).Returns(false); + insp.Setup(i => i.IsHidden(It.IsAny(), It.IsAny())).Returns(false); + insp.Setup(i => i.IsNoiseFileName(It.IsAny(), It.IsAny())).Returns(false); + insp.Setup(i => i.IsSystemAttribute(It.IsAny())).Returns(false); + insp.Setup(i => i.IsReparsePoint(It.IsAny())).Returns(false); + insp.Setup(i => i.Exists(It.IsAny())).Returns(true); + insp.Setup(i => i.IsOffline(It.IsAny())).Returns(false); + insp.Setup(i => i.IsRecallOnDataAccess(It.IsAny())).Returns(false); + var (builder, processData) = CreateBuilderWithData(insp.Object); + + var noiseRoot = Directory.CreateDirectory(Path.Combine(TestDirectory.FullName, "$RECYCLE.BIN")); + var filePath = Path.Combine(noiseRoot.FullName, "inside.txt"); + await File.WriteAllTextAsync(filePath, "x"); + + builder.AddInventoryPart(noiseRoot.FullName); + var invPath = Path.Combine(TestDirectory.FullName, "inv_noise_root_dir.zip"); + await builder.BuildBaseInventoryAsync(invPath); + + var part = builder.Inventory.InventoryParts.Single(); + part.FileDescriptions.Should().ContainSingle(fd => fd.Name == "inside.txt"); + processData.SkippedEntries.Should().NotContain(e => e.Name == "$RECYCLE.BIN"); } [Test] diff --git a/tests/ByteSync.Client.UnitTests/Services/Inventories/NoiseFileDetectorTests.cs b/tests/ByteSync.Client.UnitTests/Services/Inventories/NoiseFileDetectorTests.cs index 80fb9d1e..21464abb 100644 --- a/tests/ByteSync.Client.UnitTests/Services/Inventories/NoiseFileDetectorTests.cs +++ b/tests/ByteSync.Client.UnitTests/Services/Inventories/NoiseFileDetectorTests.cs @@ -30,8 +30,11 @@ public void IsNoiseFileName_ShouldReturnTrue_ForKnownNoiseFiles_OnLinux(string f [TestCase("THUMBS.DB")] [TestCase("EHTHUMBS.DB")] [TestCase("EHTHUMBS_VISTA.DB")] + [TestCase("$recycle.bin")] [TestCase(".ds_store")] [TestCase(".appledouble")] + [TestCase(".appledb")] + [TestCase(".appledesktop")] [TestCase(".lsoverride")] [TestCase(".spotlight-v100")] [TestCase(".trashes")] @@ -52,8 +55,11 @@ public void IsNoiseFileName_ShouldBeCaseInsensitive_OnNonLinuxPlatforms(string f [TestCase("THUMBS.DB")] [TestCase("EHTHUMBS.DB")] [TestCase("EHTHUMBS_VISTA.DB")] + [TestCase("$recycle.bin")] [TestCase(".ds_store")] [TestCase(".appledouble")] + [TestCase(".appledb")] + [TestCase(".appledesktop")] [TestCase(".lsoverride")] [TestCase(".spotlight-v100")] [TestCase(".trashes")] @@ -108,4 +114,4 @@ private static string[] LoadNoiseFileNamesFromEmbeddedResource() return data!; } -} \ No newline at end of file +}