Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
CodeBasedEvaluatorDefinition,
EvaluatorVersion,
)
from ...operations._patch_evaluators import _GIT_DIR, _load_gitignore_spec, _is_ignored

logger = logging.getLogger(__name__)

Expand All @@ -47,8 +48,11 @@ async def _upload_folder_to_blob(
) -> None:
"""Walk *folder* and upload every eligible file to the blob container.

Skips ``__pycache__``, ``.git``, ``.venv``, ``venv``, ``node_modules``
directories and ``.pyc`` / ``.pyo`` files.
Files and directories matching patterns in *folder/.gitignore* (or
*folder/.git/info/exclude*) are skipped. The ``.git`` directory is
always excluded as a safety measure regardless of ``.gitignore`` contents.
When no ``.gitignore`` is present, all files (except those inside
``.git``) are uploaded.

:param container_client: The blob container client to upload files to.
:type container_client: ~azure.storage.blob.ContainerClient
Expand All @@ -58,19 +62,25 @@ async def _upload_folder_to_blob(
:raises HttpResponseError: Re-raised with a friendlier message on
``AuthorizationPermissionMismatch``.
"""
skip_dirs = {"__pycache__", ".git", ".venv", "venv", "node_modules"}
skip_extensions = {".pyc", ".pyo"}
root_path = Path(folder)
spec = _load_gitignore_spec(root_path)
files_uploaded = False

for root, dirs, files in os.walk(folder):
dirs[:] = [d for d in dirs if d not in skip_dirs]
# Always skip .git; also skip any directory that matches .gitignore patterns.
dirs[:] = [
d
for d in dirs
if d != _GIT_DIR and not _is_ignored(spec, root_path, Path(root) / d)
]
for file_name in files:
if any(file_name.endswith(ext) for ext in skip_extensions):
file_path = Path(root) / file_name
if _is_ignored(spec, root_path, file_path):
continue
file_path = os.path.join(root, file_name)
blob_name = os.path.relpath(file_path, folder).replace("\\", "/")
logger.debug("[upload] Start uploading file `%s` as blob `%s`.", file_path, blob_name)
with open(file=file_path, mode="rb") as data:
file_path_str = str(file_path)
blob_name = os.path.relpath(file_path_str, folder).replace("\\", "/")
logger.debug("[upload] Start uploading file `%s` as blob `%s`.", file_path_str, blob_name)
with open(file=file_path_str, mode="rb") as data:
try:
await container_client.upload_blob(name=str(blob_name), data=data, **kwargs)
except HttpResponseError as e:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from typing import Any, Final, IO, Tuple, Optional, Union
from pathlib import Path
from urllib.parse import urlsplit
import pathspec
from azure.storage.blob import ContainerClient
from azure.core.tracing.decorator import distributed_trace
from azure.core.exceptions import HttpResponseError, ResourceNotFoundError
Expand All @@ -28,6 +29,47 @@

_EVALUATORS_FOUNDRY_FEATURES_VALUE: Final[str] = _FoundryFeaturesOptInKeys.EVALUATIONS_V1_PREVIEW.value

# The .git directory is always excluded from uploads regardless of .gitignore contents.
_GIT_DIR: Final[str] = ".git"


def _load_gitignore_spec(root: Path) -> pathspec.PathSpec:
"""Load gitignore patterns from *root/.gitignore* (and *root/.git/info/exclude* if present).

Returns a :class:`pathspec.PathSpec` that can be used to test whether a relative
path should be ignored. When neither file exists an empty spec is returned so that
nothing is filtered (except the hard-coded ``.git`` safety exclusion).

:param root: The root folder whose ``.gitignore`` should be loaded.
:type root: pathlib.Path
:return: A compiled PathSpec for the discovered patterns.
:rtype: pathspec.PathSpec
"""
patterns: list = []
gitignore = root / ".gitignore"
if gitignore.is_file():
patterns.extend(gitignore.read_text(encoding="utf-8").splitlines())
git_exclude = root / _GIT_DIR / "info" / "exclude"
if git_exclude.is_file():
patterns.extend(git_exclude.read_text(encoding="utf-8").splitlines())
return pathspec.PathSpec.from_lines("gitwildmatch", patterns)


def _is_ignored(spec: pathspec.PathSpec, root: Path, path: Path) -> bool:
"""Return ``True`` if *path* matches any pattern in *spec* relative to *root*.

:param spec: The compiled gitignore PathSpec.
:type spec: pathspec.PathSpec
:param root: The root folder used as the base for relative-path matching.
:type root: pathlib.Path
:param path: The absolute (or root-relative) path to test.
:type path: pathlib.Path
:return: ``True`` if the path is ignored, ``False`` otherwise.
:rtype: bool
"""
rel = path.relative_to(root).as_posix()
return spec.match_file(rel)


class BetaEvaluatorsOperations(BetaEvaluatorsOperationsGenerated):
"""
Expand All @@ -47,8 +89,11 @@ def _upload_folder_to_blob(
) -> None:
"""Walk *folder* and upload every eligible file to the blob container.

Skips ``__pycache__``, ``.git``, ``.venv``, ``venv``, ``node_modules``
directories and ``.pyc`` / ``.pyo`` files.
Files and directories matching patterns in *folder/.gitignore* (or
*folder/.git/info/exclude*) are skipped. The ``.git`` directory is
always excluded as a safety measure regardless of ``.gitignore`` contents.
When no ``.gitignore`` is present, all files (except those inside
``.git``) are uploaded.

:param container_client: The blob container client to upload files to.
:type container_client: ~azure.storage.blob.ContainerClient
Expand All @@ -58,19 +103,25 @@ def _upload_folder_to_blob(
:raises HttpResponseError: Re-raised with a friendlier message on
``AuthorizationPermissionMismatch``.
"""
skip_dirs = {"__pycache__", ".git", ".venv", "venv", "node_modules"}
skip_extensions = {".pyc", ".pyo"}
root_path = Path(folder)
spec = _load_gitignore_spec(root_path)
files_uploaded = False

for root, dirs, files in os.walk(folder):
dirs[:] = [d for d in dirs if d not in skip_dirs]
# Always skip .git; also skip any directory that matches .gitignore patterns.
dirs[:] = [
d
for d in dirs
if d != _GIT_DIR and not _is_ignored(spec, root_path, Path(root) / d)
]
for file_name in files:
if any(file_name.endswith(ext) for ext in skip_extensions):
file_path = Path(root) / file_name
if _is_ignored(spec, root_path, file_path):
continue
file_path = os.path.join(root, file_name)
blob_name = os.path.relpath(file_path, folder).replace("\\", "/")
logger.debug("[upload] Start uploading file `%s` as blob `%s`.", file_path, blob_name)
with open(file=file_path, mode="rb") as data:
file_path_str = str(file_path)
blob_name = os.path.relpath(file_path_str, folder).replace("\\", "/")
logger.debug("[upload] Start uploading file `%s` as blob `%s`.", file_path_str, blob_name)
with open(file=file_path_str, mode="rb") as data:
try:
container_client.upload_blob(name=str(blob_name), data=data, **kwargs)
except HttpResponseError as e:
Expand Down
1 change: 1 addition & 0 deletions sdk/ai/azure-ai-projects/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ dependencies = [
"azure-identity>=1.15.0",
"openai>=2.8.0",
"azure-storage-blob>=12.15.0",
"pathspec>=0.9.0",
]
dynamic = [
"version", "readme"
Expand Down
153 changes: 147 additions & 6 deletions sdk/ai/azure-ai-projects/tests/evaluators/test_evaluators_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ def test_upload_skips_pycache_and_pyc_files(self):
"__pycache__/evaluator.cpython-312.pyc": b"compiled",
"other.pyc": b"compiled",
"other.pyo": b"optimized",
".gitignore": b"__pycache__/\n*.pyc\n*.pyo\n",
}
)

Expand All @@ -282,12 +283,152 @@ def test_upload_skips_pycache_and_pyc_files(self):
folder=folder,
)

# Only evaluator.py should be uploaded
assert mock_container.upload_blob.call_count == 1
blob_name = mock_container.upload_blob.call_args.kwargs.get("name") or mock_container.upload_blob.call_args[
1
].get("name")
assert blob_name == "evaluator.py"
# evaluator.py and .gitignore should be uploaded; pycache and pyc/pyo files skipped
uploaded_names = sorted(
c.kwargs.get("name") or c[1].get("name") for c in mock_container.upload_blob.call_args_list
)
assert ".gitignore" in uploaded_names
assert "evaluator.py" in uploaded_names
assert not any(n.endswith(".pyc") or n.endswith(".pyo") or "__pycache__" in n for n in uploaded_names)

# ---------------------------------------------------------------
# upload() - gitignore-based filtering tests
# ---------------------------------------------------------------

def test_upload_respects_gitignore_ignored_directory(self):
"""Directories listed in .gitignore should not be traversed."""
ops = self._create_operations()
ops.list_versions.side_effect = ResourceNotFoundError("Not found")
ops.pending_upload.return_value = self._mock_pending_upload_response()
ops.create_version.return_value = {"name": "test", "version": "1"}

folder = self._create_temp_folder(
{
"evaluator.py": b"class Eval: pass",
".gitignore": b".venv/\nnode_modules/\n",
".venv/lib/site-packages/some_lib.py": b"ignored",
"node_modules/pkg/index.js": b"ignored",
}
)

with patch("azure.ai.projects.operations._patch_evaluators.ContainerClient") as MockContainerClient:
mock_container = MagicMock()
MockContainerClient.from_container_url.return_value = mock_container
mock_container.__enter__ = MagicMock(return_value=mock_container)
mock_container.__exit__ = MagicMock(return_value=False)

ops.upload(
name="test",
evaluator_version={"definition": {}},
folder=folder,
)

uploaded_names = [
c.kwargs.get("name") or c[1].get("name") for c in mock_container.upload_blob.call_args_list
]
# Ignored directory contents must not be uploaded
assert not any(".venv" in n or "node_modules" in n for n in uploaded_names)
assert "evaluator.py" in uploaded_names

def test_upload_respects_gitignore_ignored_file(self):
"""Files matching .gitignore patterns should not be uploaded."""
ops = self._create_operations()
ops.list_versions.side_effect = ResourceNotFoundError("Not found")
ops.pending_upload.return_value = self._mock_pending_upload_response()
ops.create_version.return_value = {"name": "test", "version": "1"}

folder = self._create_temp_folder(
{
"evaluator.py": b"class Eval: pass",
".gitignore": b"*.log\nsecrets.env\n",
"debug.log": b"log content",
"secrets.env": b"SECRET=abc",
}
)

with patch("azure.ai.projects.operations._patch_evaluators.ContainerClient") as MockContainerClient:
mock_container = MagicMock()
MockContainerClient.from_container_url.return_value = mock_container
mock_container.__enter__ = MagicMock(return_value=mock_container)
mock_container.__exit__ = MagicMock(return_value=False)

ops.upload(
name="test",
evaluator_version={"definition": {}},
folder=folder,
)

uploaded_names = [
c.kwargs.get("name") or c[1].get("name") for c in mock_container.upload_blob.call_args_list
]
assert "evaluator.py" in uploaded_names
assert "debug.log" not in uploaded_names
assert "secrets.env" not in uploaded_names

def test_upload_uploads_all_files_when_no_gitignore(self):
"""When .gitignore is absent all files (except inside .git) should be uploaded."""
ops = self._create_operations()
ops.list_versions.side_effect = ResourceNotFoundError("Not found")
ops.pending_upload.return_value = self._mock_pending_upload_response()
ops.create_version.return_value = {"name": "test", "version": "1"}

folder = self._create_temp_folder(
{
"evaluator.py": b"class Eval: pass",
"requirements.txt": b"azure-ai-projects\n",
"utils/helper.py": b"def helper(): pass",
}
)

with patch("azure.ai.projects.operations._patch_evaluators.ContainerClient") as MockContainerClient:
mock_container = MagicMock()
MockContainerClient.from_container_url.return_value = mock_container
mock_container.__enter__ = MagicMock(return_value=mock_container)
mock_container.__exit__ = MagicMock(return_value=False)

ops.upload(
name="test",
evaluator_version={"definition": {}},
folder=folder,
)

uploaded_names = sorted(
c.kwargs.get("name") or c[1].get("name") for c in mock_container.upload_blob.call_args_list
)
assert uploaded_names == sorted(["evaluator.py", "requirements.txt", "utils/helper.py"])

def test_upload_always_skips_git_dir(self):
""".git directory must never be uploaded, even without a .gitignore."""
ops = self._create_operations()
ops.list_versions.side_effect = ResourceNotFoundError("Not found")
ops.pending_upload.return_value = self._mock_pending_upload_response()
ops.create_version.return_value = {"name": "test", "version": "1"}

folder = self._create_temp_folder(
{
"evaluator.py": b"class Eval: pass",
".git/config": b"[core]\n bare = false\n",
".git/HEAD": b"ref: refs/heads/main\n",
}
)

with patch("azure.ai.projects.operations._patch_evaluators.ContainerClient") as MockContainerClient:
mock_container = MagicMock()
MockContainerClient.from_container_url.return_value = mock_container
mock_container.__enter__ = MagicMock(return_value=mock_container)
mock_container.__exit__ = MagicMock(return_value=False)

ops.upload(
name="test",
evaluator_version={"definition": {}},
folder=folder,
)

uploaded_names = [
c.kwargs.get("name") or c[1].get("name") for c in mock_container.upload_blob.call_args_list
]
assert not any(".git" in n for n in uploaded_names)
assert "evaluator.py" in uploaded_names

# ---------------------------------------------------------------
# upload() - blob_uri set on evaluator version tests
Expand Down
Loading
Loading