-
Notifications
You must be signed in to change notification settings - Fork 18
Expand file tree
/
Copy pathgit_bdiff.py
More file actions
200 lines (156 loc) · 6.13 KB
/
git_bdiff.py
File metadata and controls
200 lines (156 loc) · 6.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#!/usr/bin/env python3
# *********************************COPYRIGHT************************************
# (C) Crown copyright Met Office. All rights reserved.
# For further details please refer to the file COPYRIGHT.txt
# which you should have received as part of this distribution.
# *********************************COPYRIGHT************************************
"""
Module to obtain a list of all altered files on a git branch from
point where it diverged from the parent branch to the most recent
commit.
Usage is as follows:
>>> bdiff = GitBDiff()
And then:
>>> for change in bdiff.files():
... print(change)
"""
import re
import subprocess
from pathlib import Path
class GitBDiffError(Exception):
"""Base bdiff error class."""
class GitBDiffNotGit(GitBDiffError):
"""Error if the target not part of a git repository."""
def __init__(self, cmd):
super().__init__(
"not a repository (cmd:" + " ".join([str(i) for i in cmd]) + ")"
)
class GitBase:
"""
Base class for gitbdiff functionality
"""
# Match branch names. This should catch all valid names but may
# also some invalid names through. This should matter given that
# it is being used to match git command output. For a complete
# overview of the naming scheme, see man git check-ref-format
_branch_pattern = re.compile(r"^\s*([^\s~\^\:\?\*\[]+[^.])\s*$")
# Text returned if in a detached head
detached_head_reference = "detched_head_state"
def __init__(self, parent=None, repo=None):
if repo is None:
self._repo = None
else:
self._repo = Path(repo)
if not self._repo.is_dir():
raise GitBDiffError(f"{repo} is not a directory")
def get_branch_name(self):
"""Get the name of the current branch."""
result = None
for line in self.run_git(["branch", "--show-current"]):
# Set m to self._branch_pattern result
# Then check m evaluates to True
if m := self._branch_pattern.match(line):
result = m.group(1)
break
else:
# Check for being in a Detached Head state
for line in self.run_git(["branch"]):
if "HEAD detached" in line:
result = self.detached_head_reference
break
else:
raise GitBDiffError("unable to get branch name")
return result
def run_git(self, args):
"""Run a git command and yield the output."""
if not isinstance(args, list):
raise TypeError("args must be a list")
cmd = ["git"] + args
# Run the the command in the repo directory, capture the
# output, and check for errors. The build in error check is
# not used to allow specific git errors to be treated more
# precisely
proc = subprocess.run(
cmd, capture_output=True, check=False, shell=False, cwd=self._repo
)
for line in proc.stderr.decode("utf-8").split("\n"):
if line.startswith("fatal: not a git repository"):
raise GitBDiffNotGit(cmd)
if line.startswith("fatal: "):
raise GitBDiffError(line[7:])
if proc.returncode != 0:
raise GitBDiffError(f"command returned {proc.returncode}")
yield from proc.stdout.decode("utf-8").split("\n")
class GitBDiff(GitBase):
"""Class which generates a branch diff."""
# Name of primary branch - default is main
primary_branch = "main"
# Match hex commit IDs
_hash_pattern = re.compile(r"^\s*([0-9a-f]{40})\s*$")
def __init__(self, parent=None, repo=None):
self.parent = parent or self.primary_branch
super().__init__(parent, repo)
self.ancestor = self.get_branch_point()
self.current = self.get_latest_commit()
self.branch = self.get_branch_name()
if self.branch == self.detached_head_reference:
raise GitBDiffError("Can't get a diff for a repo in detached head state")
def get_branch_point(self):
"""Get the branch point from the parent repo.
Find the commit which marks the point of divergence from the
parent repository. If there are no changes or this is the
trunk, the branch point will be the same as the most recent
commit.
"""
result = None
for line in self.run_git(["merge-base", self.parent, "HEAD"]):
if m := self._hash_pattern.match(line):
result = m.group(1)
break
else:
raise GitBDiffError("branch point not found")
return result
def get_latest_commit(self):
"""Get the last commit ID on the branch."""
result = None
for line in self.run_git(["show", "--pretty=%H", "--no-patch"]):
if m := self._hash_pattern.match(line):
result = m.group(1)
break
else:
raise GitBDiffError("current revision not found")
return result
@property
def is_branch(self):
"""Whether this is a branch or main."""
return self.branch != self.primary_branch
@property
def has_diverged(self):
"""Whether the branch has diverged from its parent."""
return self.ancestor != self.current
def files(self):
"""Iterate over files changed on the branch."""
for line in self.run_git([
"diff",
"--name-only",
"--diff-filter=AMX",
self.ancestor,
]):
if line != "":
yield line
class GitInfo(GitBase):
"""
Class to contain info of a git repo
"""
def __init__(self, repo=None):
super().__init__(repo=repo)
self.branch = self.get_branch_name()
def is_main(self):
"""
Returns true if branch matches a main-like branch name as defined below
Count detached head as main-like as we cannot get a diff for this
"""
main_like = ("main", "stable", "trunk", "master", self.detached_head_reference)
if self.branch in main_like:
return True
return False