From 5b20635bf652cc0528f433221ebeb570a8889270 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 1 Apr 2026 02:43:09 +0000 Subject: [PATCH 1/2] Initial plan From b34223dac9ebc5742f6496c4c78e222c68b12226 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 1 Apr 2026 03:00:40 +0000 Subject: [PATCH 2/2] Use .gitignore-based filtering in _upload_folder_to_blob (pathspec) - Add pathspec>=0.9.0 to pyproject.toml dependencies - Add _load_gitignore_spec() and _is_ignored() module-level helpers in _patch_evaluators.py; reuse them in _patch_evaluators_async.py - Replace hard-coded skip_dirs/skip_extensions with gitignore-based filtering; .git is always excluded as a safety guard - Update and extend unit tests for both sync and async code paths Agent-Logs-Url: https://github.com/Azure/azure-sdk-for-python/sessions/7c2df3db-ebbe-4987-86ca-07b8339f05c5 Co-authored-by: w-javed <7674577+w-javed@users.noreply.github.com> --- .../aio/operations/_patch_evaluators_async.py | 30 ++-- .../projects/operations/_patch_evaluators.py | 71 ++++++-- sdk/ai/azure-ai-projects/pyproject.toml | 1 + .../evaluators/test_evaluators_upload.py | 153 ++++++++++++++++- .../test_evaluators_upload_async.py | 159 +++++++++++++++++- 5 files changed, 383 insertions(+), 31 deletions(-) diff --git a/sdk/ai/azure-ai-projects/azure/ai/projects/aio/operations/_patch_evaluators_async.py b/sdk/ai/azure-ai-projects/azure/ai/projects/aio/operations/_patch_evaluators_async.py index c6c366fd5956..b829c6d9e1f1 100644 --- a/sdk/ai/azure-ai-projects/azure/ai/projects/aio/operations/_patch_evaluators_async.py +++ b/sdk/ai/azure-ai-projects/azure/ai/projects/aio/operations/_patch_evaluators_async.py @@ -23,6 +23,7 @@ CodeBasedEvaluatorDefinition, EvaluatorVersion, ) +from ...operations._patch_evaluators import _GIT_DIR, _load_gitignore_spec, _is_ignored logger = logging.getLogger(__name__) @@ -47,8 +48,11 @@ async def _upload_folder_to_blob( ) -> None: """Walk *folder* and upload every eligible file to the blob container. - Skips ``__pycache__``, ``.git``, ``.venv``, ``venv``, ``node_modules`` - directories and ``.pyc`` / ``.pyo`` files. + Files and directories matching patterns in *folder/.gitignore* (or + *folder/.git/info/exclude*) are skipped. The ``.git`` directory is + always excluded as a safety measure regardless of ``.gitignore`` contents. + When no ``.gitignore`` is present, all files (except those inside + ``.git``) are uploaded. :param container_client: The blob container client to upload files to. :type container_client: ~azure.storage.blob.ContainerClient @@ -58,19 +62,25 @@ async def _upload_folder_to_blob( :raises HttpResponseError: Re-raised with a friendlier message on ``AuthorizationPermissionMismatch``. """ - skip_dirs = {"__pycache__", ".git", ".venv", "venv", "node_modules"} - skip_extensions = {".pyc", ".pyo"} + root_path = Path(folder) + spec = _load_gitignore_spec(root_path) files_uploaded = False for root, dirs, files in os.walk(folder): - dirs[:] = [d for d in dirs if d not in skip_dirs] + # Always skip .git; also skip any directory that matches .gitignore patterns. + dirs[:] = [ + d + for d in dirs + if d != _GIT_DIR and not _is_ignored(spec, root_path, Path(root) / d) + ] for file_name in files: - if any(file_name.endswith(ext) for ext in skip_extensions): + file_path = Path(root) / file_name + if _is_ignored(spec, root_path, file_path): continue - file_path = os.path.join(root, file_name) - blob_name = os.path.relpath(file_path, folder).replace("\\", "/") - logger.debug("[upload] Start uploading file `%s` as blob `%s`.", file_path, blob_name) - with open(file=file_path, mode="rb") as data: + file_path_str = str(file_path) + blob_name = os.path.relpath(file_path_str, folder).replace("\\", "/") + logger.debug("[upload] Start uploading file `%s` as blob `%s`.", file_path_str, blob_name) + with open(file=file_path_str, mode="rb") as data: try: await container_client.upload_blob(name=str(blob_name), data=data, **kwargs) except HttpResponseError as e: diff --git a/sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch_evaluators.py b/sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch_evaluators.py index 3f1c38d97b2b..6adcfab4a487 100644 --- a/sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch_evaluators.py +++ b/sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch_evaluators.py @@ -13,6 +13,7 @@ from typing import Any, Final, IO, Tuple, Optional, Union from pathlib import Path from urllib.parse import urlsplit +import pathspec from azure.storage.blob import ContainerClient from azure.core.tracing.decorator import distributed_trace from azure.core.exceptions import HttpResponseError, ResourceNotFoundError @@ -28,6 +29,47 @@ _EVALUATORS_FOUNDRY_FEATURES_VALUE: Final[str] = _FoundryFeaturesOptInKeys.EVALUATIONS_V1_PREVIEW.value +# The .git directory is always excluded from uploads regardless of .gitignore contents. +_GIT_DIR: Final[str] = ".git" + + +def _load_gitignore_spec(root: Path) -> pathspec.PathSpec: + """Load gitignore patterns from *root/.gitignore* (and *root/.git/info/exclude* if present). + + Returns a :class:`pathspec.PathSpec` that can be used to test whether a relative + path should be ignored. When neither file exists an empty spec is returned so that + nothing is filtered (except the hard-coded ``.git`` safety exclusion). + + :param root: The root folder whose ``.gitignore`` should be loaded. + :type root: pathlib.Path + :return: A compiled PathSpec for the discovered patterns. + :rtype: pathspec.PathSpec + """ + patterns: list = [] + gitignore = root / ".gitignore" + if gitignore.is_file(): + patterns.extend(gitignore.read_text(encoding="utf-8").splitlines()) + git_exclude = root / _GIT_DIR / "info" / "exclude" + if git_exclude.is_file(): + patterns.extend(git_exclude.read_text(encoding="utf-8").splitlines()) + return pathspec.PathSpec.from_lines("gitwildmatch", patterns) + + +def _is_ignored(spec: pathspec.PathSpec, root: Path, path: Path) -> bool: + """Return ``True`` if *path* matches any pattern in *spec* relative to *root*. + + :param spec: The compiled gitignore PathSpec. + :type spec: pathspec.PathSpec + :param root: The root folder used as the base for relative-path matching. + :type root: pathlib.Path + :param path: The absolute (or root-relative) path to test. + :type path: pathlib.Path + :return: ``True`` if the path is ignored, ``False`` otherwise. + :rtype: bool + """ + rel = path.relative_to(root).as_posix() + return spec.match_file(rel) + class BetaEvaluatorsOperations(BetaEvaluatorsOperationsGenerated): """ @@ -47,8 +89,11 @@ def _upload_folder_to_blob( ) -> None: """Walk *folder* and upload every eligible file to the blob container. - Skips ``__pycache__``, ``.git``, ``.venv``, ``venv``, ``node_modules`` - directories and ``.pyc`` / ``.pyo`` files. + Files and directories matching patterns in *folder/.gitignore* (or + *folder/.git/info/exclude*) are skipped. The ``.git`` directory is + always excluded as a safety measure regardless of ``.gitignore`` contents. + When no ``.gitignore`` is present, all files (except those inside + ``.git``) are uploaded. :param container_client: The blob container client to upload files to. :type container_client: ~azure.storage.blob.ContainerClient @@ -58,19 +103,25 @@ def _upload_folder_to_blob( :raises HttpResponseError: Re-raised with a friendlier message on ``AuthorizationPermissionMismatch``. """ - skip_dirs = {"__pycache__", ".git", ".venv", "venv", "node_modules"} - skip_extensions = {".pyc", ".pyo"} + root_path = Path(folder) + spec = _load_gitignore_spec(root_path) files_uploaded = False for root, dirs, files in os.walk(folder): - dirs[:] = [d for d in dirs if d not in skip_dirs] + # Always skip .git; also skip any directory that matches .gitignore patterns. + dirs[:] = [ + d + for d in dirs + if d != _GIT_DIR and not _is_ignored(spec, root_path, Path(root) / d) + ] for file_name in files: - if any(file_name.endswith(ext) for ext in skip_extensions): + file_path = Path(root) / file_name + if _is_ignored(spec, root_path, file_path): continue - file_path = os.path.join(root, file_name) - blob_name = os.path.relpath(file_path, folder).replace("\\", "/") - logger.debug("[upload] Start uploading file `%s` as blob `%s`.", file_path, blob_name) - with open(file=file_path, mode="rb") as data: + file_path_str = str(file_path) + blob_name = os.path.relpath(file_path_str, folder).replace("\\", "/") + logger.debug("[upload] Start uploading file `%s` as blob `%s`.", file_path_str, blob_name) + with open(file=file_path_str, mode="rb") as data: try: container_client.upload_blob(name=str(blob_name), data=data, **kwargs) except HttpResponseError as e: diff --git a/sdk/ai/azure-ai-projects/pyproject.toml b/sdk/ai/azure-ai-projects/pyproject.toml index 26eac48e1123..7b950dccf56f 100644 --- a/sdk/ai/azure-ai-projects/pyproject.toml +++ b/sdk/ai/azure-ai-projects/pyproject.toml @@ -38,6 +38,7 @@ dependencies = [ "azure-identity>=1.15.0", "openai>=2.8.0", "azure-storage-blob>=12.15.0", + "pathspec>=0.9.0", ] dynamic = [ "version", "readme" diff --git a/sdk/ai/azure-ai-projects/tests/evaluators/test_evaluators_upload.py b/sdk/ai/azure-ai-projects/tests/evaluators/test_evaluators_upload.py index 09e2279b2701..91b4084f689e 100644 --- a/sdk/ai/azure-ai-projects/tests/evaluators/test_evaluators_upload.py +++ b/sdk/ai/azure-ai-projects/tests/evaluators/test_evaluators_upload.py @@ -267,6 +267,7 @@ def test_upload_skips_pycache_and_pyc_files(self): "__pycache__/evaluator.cpython-312.pyc": b"compiled", "other.pyc": b"compiled", "other.pyo": b"optimized", + ".gitignore": b"__pycache__/\n*.pyc\n*.pyo\n", } ) @@ -282,12 +283,152 @@ def test_upload_skips_pycache_and_pyc_files(self): folder=folder, ) - # Only evaluator.py should be uploaded - assert mock_container.upload_blob.call_count == 1 - blob_name = mock_container.upload_blob.call_args.kwargs.get("name") or mock_container.upload_blob.call_args[ - 1 - ].get("name") - assert blob_name == "evaluator.py" + # evaluator.py and .gitignore should be uploaded; pycache and pyc/pyo files skipped + uploaded_names = sorted( + c.kwargs.get("name") or c[1].get("name") for c in mock_container.upload_blob.call_args_list + ) + assert ".gitignore" in uploaded_names + assert "evaluator.py" in uploaded_names + assert not any(n.endswith(".pyc") or n.endswith(".pyo") or "__pycache__" in n for n in uploaded_names) + + # --------------------------------------------------------------- + # upload() - gitignore-based filtering tests + # --------------------------------------------------------------- + + def test_upload_respects_gitignore_ignored_directory(self): + """Directories listed in .gitignore should not be traversed.""" + ops = self._create_operations() + ops.list_versions.side_effect = ResourceNotFoundError("Not found") + ops.pending_upload.return_value = self._mock_pending_upload_response() + ops.create_version.return_value = {"name": "test", "version": "1"} + + folder = self._create_temp_folder( + { + "evaluator.py": b"class Eval: pass", + ".gitignore": b".venv/\nnode_modules/\n", + ".venv/lib/site-packages/some_lib.py": b"ignored", + "node_modules/pkg/index.js": b"ignored", + } + ) + + with patch("azure.ai.projects.operations._patch_evaluators.ContainerClient") as MockContainerClient: + mock_container = MagicMock() + MockContainerClient.from_container_url.return_value = mock_container + mock_container.__enter__ = MagicMock(return_value=mock_container) + mock_container.__exit__ = MagicMock(return_value=False) + + ops.upload( + name="test", + evaluator_version={"definition": {}}, + folder=folder, + ) + + uploaded_names = [ + c.kwargs.get("name") or c[1].get("name") for c in mock_container.upload_blob.call_args_list + ] + # Ignored directory contents must not be uploaded + assert not any(".venv" in n or "node_modules" in n for n in uploaded_names) + assert "evaluator.py" in uploaded_names + + def test_upload_respects_gitignore_ignored_file(self): + """Files matching .gitignore patterns should not be uploaded.""" + ops = self._create_operations() + ops.list_versions.side_effect = ResourceNotFoundError("Not found") + ops.pending_upload.return_value = self._mock_pending_upload_response() + ops.create_version.return_value = {"name": "test", "version": "1"} + + folder = self._create_temp_folder( + { + "evaluator.py": b"class Eval: pass", + ".gitignore": b"*.log\nsecrets.env\n", + "debug.log": b"log content", + "secrets.env": b"SECRET=abc", + } + ) + + with patch("azure.ai.projects.operations._patch_evaluators.ContainerClient") as MockContainerClient: + mock_container = MagicMock() + MockContainerClient.from_container_url.return_value = mock_container + mock_container.__enter__ = MagicMock(return_value=mock_container) + mock_container.__exit__ = MagicMock(return_value=False) + + ops.upload( + name="test", + evaluator_version={"definition": {}}, + folder=folder, + ) + + uploaded_names = [ + c.kwargs.get("name") or c[1].get("name") for c in mock_container.upload_blob.call_args_list + ] + assert "evaluator.py" in uploaded_names + assert "debug.log" not in uploaded_names + assert "secrets.env" not in uploaded_names + + def test_upload_uploads_all_files_when_no_gitignore(self): + """When .gitignore is absent all files (except inside .git) should be uploaded.""" + ops = self._create_operations() + ops.list_versions.side_effect = ResourceNotFoundError("Not found") + ops.pending_upload.return_value = self._mock_pending_upload_response() + ops.create_version.return_value = {"name": "test", "version": "1"} + + folder = self._create_temp_folder( + { + "evaluator.py": b"class Eval: pass", + "requirements.txt": b"azure-ai-projects\n", + "utils/helper.py": b"def helper(): pass", + } + ) + + with patch("azure.ai.projects.operations._patch_evaluators.ContainerClient") as MockContainerClient: + mock_container = MagicMock() + MockContainerClient.from_container_url.return_value = mock_container + mock_container.__enter__ = MagicMock(return_value=mock_container) + mock_container.__exit__ = MagicMock(return_value=False) + + ops.upload( + name="test", + evaluator_version={"definition": {}}, + folder=folder, + ) + + uploaded_names = sorted( + c.kwargs.get("name") or c[1].get("name") for c in mock_container.upload_blob.call_args_list + ) + assert uploaded_names == sorted(["evaluator.py", "requirements.txt", "utils/helper.py"]) + + def test_upload_always_skips_git_dir(self): + """.git directory must never be uploaded, even without a .gitignore.""" + ops = self._create_operations() + ops.list_versions.side_effect = ResourceNotFoundError("Not found") + ops.pending_upload.return_value = self._mock_pending_upload_response() + ops.create_version.return_value = {"name": "test", "version": "1"} + + folder = self._create_temp_folder( + { + "evaluator.py": b"class Eval: pass", + ".git/config": b"[core]\n bare = false\n", + ".git/HEAD": b"ref: refs/heads/main\n", + } + ) + + with patch("azure.ai.projects.operations._patch_evaluators.ContainerClient") as MockContainerClient: + mock_container = MagicMock() + MockContainerClient.from_container_url.return_value = mock_container + mock_container.__enter__ = MagicMock(return_value=mock_container) + mock_container.__exit__ = MagicMock(return_value=False) + + ops.upload( + name="test", + evaluator_version={"definition": {}}, + folder=folder, + ) + + uploaded_names = [ + c.kwargs.get("name") or c[1].get("name") for c in mock_container.upload_blob.call_args_list + ] + assert not any(".git" in n for n in uploaded_names) + assert "evaluator.py" in uploaded_names # --------------------------------------------------------------- # upload() - blob_uri set on evaluator version tests diff --git a/sdk/ai/azure-ai-projects/tests/evaluators/test_evaluators_upload_async.py b/sdk/ai/azure-ai-projects/tests/evaluators/test_evaluators_upload_async.py index 5417331b6585..740f402fa458 100644 --- a/sdk/ai/azure-ai-projects/tests/evaluators/test_evaluators_upload_async.py +++ b/sdk/ai/azure-ai-projects/tests/evaluators/test_evaluators_upload_async.py @@ -299,6 +299,7 @@ async def test_upload_skips_pycache_and_pyc_files(self): "__pycache__/evaluator.cpython-312.pyc": b"compiled", "other.pyc": b"compiled", "other.pyo": b"optimized", + ".gitignore": b"__pycache__/\n*.pyc\n*.pyo\n", } ) @@ -314,11 +315,159 @@ async def test_upload_skips_pycache_and_pyc_files(self): folder=folder, ) - assert mock_container.upload_blob.call_count == 1 - blob_name = mock_container.upload_blob.call_args.kwargs.get("name") or mock_container.upload_blob.call_args[ - 1 - ].get("name") - assert blob_name == "evaluator.py" + # evaluator.py and .gitignore should be uploaded; pycache and pyc/pyo files skipped + uploaded_names = sorted( + c.kwargs.get("name") or c[1].get("name") for c in mock_container.upload_blob.call_args_list + ) + assert ".gitignore" in uploaded_names + assert "evaluator.py" in uploaded_names + assert not any(n.endswith(".pyc") or n.endswith(".pyo") or "__pycache__" in n for n in uploaded_names) + + # --------------------------------------------------------------- + # upload() - gitignore-based filtering tests + # --------------------------------------------------------------- + + @pytest.mark.asyncio + async def test_upload_respects_gitignore_ignored_directory(self): + """Directories listed in .gitignore should not be traversed.""" + ops = self._create_operations() + + ops.list_versions.side_effect = ResourceNotFoundError("Not found") + ops.pending_upload.return_value = self._mock_pending_upload_response() + ops.create_version.return_value = {"name": "test", "version": "1"} + + folder = self._create_temp_folder( + { + "evaluator.py": b"class Eval: pass", + ".gitignore": b".venv/\nnode_modules/\n", + ".venv/lib/site-packages/some_lib.py": b"ignored", + "node_modules/pkg/index.js": b"ignored", + } + ) + + with patch("azure.ai.projects.aio.operations._patch_evaluators_async.ContainerClient") as MockContainerClient: + mock_container = AsyncMock() + MockContainerClient.from_container_url.return_value = mock_container + mock_container.__aenter__ = AsyncMock(return_value=mock_container) + mock_container.__aexit__ = AsyncMock(return_value=False) + + await ops.upload( + name="test", + evaluator_version={"definition": {}}, + folder=folder, + ) + + uploaded_names = [ + c.kwargs.get("name") or c[1].get("name") for c in mock_container.upload_blob.call_args_list + ] + assert not any(".venv" in n or "node_modules" in n for n in uploaded_names) + assert "evaluator.py" in uploaded_names + + @pytest.mark.asyncio + async def test_upload_respects_gitignore_ignored_file(self): + """Files matching .gitignore patterns should not be uploaded.""" + ops = self._create_operations() + + ops.list_versions.side_effect = ResourceNotFoundError("Not found") + ops.pending_upload.return_value = self._mock_pending_upload_response() + ops.create_version.return_value = {"name": "test", "version": "1"} + + folder = self._create_temp_folder( + { + "evaluator.py": b"class Eval: pass", + ".gitignore": b"*.log\nsecrets.env\n", + "debug.log": b"log content", + "secrets.env": b"SECRET=abc", + } + ) + + with patch("azure.ai.projects.aio.operations._patch_evaluators_async.ContainerClient") as MockContainerClient: + mock_container = AsyncMock() + MockContainerClient.from_container_url.return_value = mock_container + mock_container.__aenter__ = AsyncMock(return_value=mock_container) + mock_container.__aexit__ = AsyncMock(return_value=False) + + await ops.upload( + name="test", + evaluator_version={"definition": {}}, + folder=folder, + ) + + uploaded_names = [ + c.kwargs.get("name") or c[1].get("name") for c in mock_container.upload_blob.call_args_list + ] + assert "evaluator.py" in uploaded_names + assert "debug.log" not in uploaded_names + assert "secrets.env" not in uploaded_names + + @pytest.mark.asyncio + async def test_upload_uploads_all_files_when_no_gitignore(self): + """When .gitignore is absent all files (except inside .git) should be uploaded.""" + ops = self._create_operations() + + ops.list_versions.side_effect = ResourceNotFoundError("Not found") + ops.pending_upload.return_value = self._mock_pending_upload_response() + ops.create_version.return_value = {"name": "test", "version": "1"} + + folder = self._create_temp_folder( + { + "evaluator.py": b"class Eval: pass", + "requirements.txt": b"azure-ai-projects\n", + "utils/helper.py": b"def helper(): pass", + } + ) + + with patch("azure.ai.projects.aio.operations._patch_evaluators_async.ContainerClient") as MockContainerClient: + mock_container = AsyncMock() + MockContainerClient.from_container_url.return_value = mock_container + mock_container.__aenter__ = AsyncMock(return_value=mock_container) + mock_container.__aexit__ = AsyncMock(return_value=False) + + await ops.upload( + name="test", + evaluator_version={"definition": {}}, + folder=folder, + ) + + uploaded_names = sorted( + c.kwargs.get("name") or c[1].get("name") for c in mock_container.upload_blob.call_args_list + ) + assert uploaded_names == sorted(["evaluator.py", "requirements.txt", "utils/helper.py"]) + + @pytest.mark.asyncio + async def test_upload_always_skips_git_dir(self): + """.git directory must never be uploaded, even without a .gitignore.""" + ops = self._create_operations() + + ops.list_versions.side_effect = ResourceNotFoundError("Not found") + ops.pending_upload.return_value = self._mock_pending_upload_response() + ops.create_version.return_value = {"name": "test", "version": "1"} + + folder = self._create_temp_folder( + { + "evaluator.py": b"class Eval: pass", + ".git/config": b"[core]\n bare = false\n", + ".git/HEAD": b"ref: refs/heads/main\n", + } + ) + + with patch("azure.ai.projects.aio.operations._patch_evaluators_async.ContainerClient") as MockContainerClient: + mock_container = AsyncMock() + MockContainerClient.from_container_url.return_value = mock_container + mock_container.__aenter__ = AsyncMock(return_value=mock_container) + mock_container.__aexit__ = AsyncMock(return_value=False) + + await ops.upload( + name="test", + evaluator_version={"definition": {}}, + folder=folder, + ) + + uploaded_names = [ + c.kwargs.get("name") or c[1].get("name") for c in mock_container.upload_blob.call_args_list + ] + assert not any(".git" in n for n in uploaded_names) + assert "evaluator.py" in uploaded_names # --------------------------------------------------------------- # upload() - blob_uri set on evaluator version tests