Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion providers/git/src/airflow/providers/git/bundles/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,16 +123,41 @@ def _is_pruned_worktree(self) -> bool:
return False
return not (self.repo_path / ".git").exists()

def _local_repo_has_version(self) -> bool:
"""Check if the local repo already has the correct version checked out."""
if not self.version or not self.repo_path.is_dir() or not (self.repo_path / ".git").exists():
return False
repo = None
try:
repo = Repo(self.repo_path)
expected_commit = repo.commit(self.version)
has_version = repo.head.commit.hexsha == expected_commit.hexsha
return has_version
except (InvalidGitRepositoryError, NoSuchPathError, BadName, GitCommandError, ValueError):
return False
Comment thread
ronaldorcampos marked this conversation as resolved.
finally:
if repo is not None:
repo.close()

def _initialize(self):
with self.lock():
# Avoids re-cloning on every task run when prune_dotgit_folder=True.
# Avoids re-cloning on every task run when:
# 1. A pruned worktree already exists (prune_dotgit_folder=True)
# 2. The local repo already has the expected version
if self._is_pruned_worktree():
self._log.debug(
"Using existing pruned worktree",
repo_path=self.repo_path,
version=self.version,
)
return
if self._local_repo_has_version():
self._log.debug(
"Using existing local repo with correct version",
repo_path=self.repo_path,
version=self.version,
)
Copy link

Copilot AI Apr 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When _local_repo_has_version() is true, _initialize() returns early without setting self.repo. For versioned bundles with prune_dotgit_folder=False, the normal path sets self.repo = Repo(self.repo_path) and get_current_version() then returns HEAD’s full SHA; with this early-return path get_current_version() will instead return the raw self.version string (e.g. a tag/short SHA), which is an observable behavior change compared to a first-time initialize. Consider opening/assigning self.repo (and ensuring it’s closed) before returning, or adjusting get_current_version() to resolve HEAD when .git exists even if self.repo isn’t set; adding a regression test for the tag/short-SHA case would prevent this from reappearing.

Suggested change
)
)
if not self.prune_dotgit_folder and self.repo is None:
try:
self.repo = Repo(self.repo_path)
except InvalidGitRepositoryError as e:
raise RuntimeError(f"Invalid git repository at {self.repo_path}") from e
if self.repo is not None:
self.repo.close()

Copilot uses AI. Check for mistakes.
return

cm = self.hook.configure_hook_env() if self.hook else nullcontext()
with cm:
Expand Down
113 changes: 113 additions & 0 deletions providers/git/tests/unit/git/bundles/test_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,119 @@ def test_second_initialize_reuses_pruned_worktree_without_recloning(self, mock_g
files_in_repo = {f.name for f in bundle2.path.iterdir() if f.is_file()}
assert {"test_dag.py"} == files_in_repo

@mock.patch("airflow.providers.git.bundles.git.GitHook")
def test_second_initialize_skips_clone_when_local_repo_has_version(self, mock_githook, git_repo):
"""When the local repo already has the correct version checked out (with .git intact), skip re-cloning."""
repo_path, repo = git_repo
mock_githook.return_value.repo_url = repo_path
version = repo.head.commit.hexsha
bundle_name = "test_version_reuse"

# Clone with prune_dotgit_folder=False so .git is preserved
bundle1 = GitDagBundle(
name=bundle_name,
git_conn_id=CONN_HTTPS,
version=version,
tracking_ref=GIT_DEFAULT_BRANCH,
prune_dotgit_folder=False,
)
bundle1.initialize()
assert (bundle1.repo_path / ".git").exists()
assert bundle1.get_current_version() == version

# Should detect local repo has correct version and skip clone
with (
patch.object(GitDagBundle, "_clone_bare_repo_if_required") as mock_bare_clone,
patch.object(GitDagBundle, "_clone_repo_if_required") as mock_clone,
):
bundle2 = GitDagBundle(
name=bundle_name,
git_conn_id=CONN_HTTPS,
version=version,
tracking_ref=GIT_DEFAULT_BRANCH,
prune_dotgit_folder=False,
)
bundle2.initialize()
mock_bare_clone.assert_not_called()
mock_clone.assert_not_called()

@mock.patch("airflow.providers.git.bundles.git.GitHook")
def test_initialize_with_different_versions_creates_separate_repos(self, mock_githook, git_repo):
"""Initializing the same bundle with different versions creates separate repos, each at the requested version."""
repo_path, repo = git_repo
mock_githook.return_value.repo_url = repo_path
first_commit = repo.head.commit.hexsha

# Add a second commit
file_path = repo_path / "new_file.py"
with open(file_path, "w") as f:
f.write("new content")
repo.index.add([file_path])
repo.index.commit("Second commit")
second_commit = repo.head.commit.hexsha

bundle_name = "test_version_mismatch"

# First init: clone at second_commit
bundle1 = GitDagBundle(
name=bundle_name,
git_conn_id=CONN_HTTPS,
version=second_commit,
tracking_ref=GIT_DEFAULT_BRANCH,
prune_dotgit_folder=False,
)
bundle1.initialize()
assert bundle1.get_current_version() == second_commit

# Second init with first_commit: different version means different repo_path
bundle2 = GitDagBundle(
name=bundle_name,
git_conn_id=CONN_HTTPS,
version=first_commit,
tracking_ref=GIT_DEFAULT_BRANCH,
prune_dotgit_folder=False,
)
bundle2.initialize()
assert bundle2.get_current_version() == first_commit
assert bundle1.repo_path != bundle2.repo_path

@mock.patch("airflow.providers.git.bundles.git.GitHook")
def test_local_repo_has_version_returns_false_when_head_mismatches(self, mock_githook, git_repo):
"""When the local repo exists at the version path but HEAD doesn't match, _local_repo_has_version returns False."""
repo_path, repo = git_repo
mock_githook.return_value.repo_url = repo_path
first_commit = repo.head.commit.hexsha

# Add a second commit
file_path = repo_path / "new_file.py"
with open(file_path, "w") as f:
f.write("new content")
repo.index.add([file_path])
repo.index.commit("Second commit")
second_commit = repo.head.commit.hexsha

bundle_name = "test_wrong_head"

# Clone at second_commit with .git preserved
bundle = GitDagBundle(
name=bundle_name,
git_conn_id=CONN_HTTPS,
version=second_commit,
tracking_ref=GIT_DEFAULT_BRANCH,
prune_dotgit_folder=False,
)
bundle.initialize()
assert bundle.get_current_version() == second_commit
assert bundle._local_repo_has_version() is True

# Mutate the cloned repo's HEAD to point at first_commit so HEAD != version
cloned_repo = Repo(bundle.repo_path)
cloned_repo.head.reset(first_commit, index=True, working_tree=True)
cloned_repo.close()

# Same bundle config but now HEAD doesn't match version — should return False
assert bundle._local_repo_has_version() is False

@pytest.mark.parametrize(
"amend",
[
Expand Down
Loading