-
Notifications
You must be signed in to change notification settings - Fork 406
146 lines (126 loc) · 4.88 KB
/
dvc-diff.yml
File metadata and controls
146 lines (126 loc) · 4.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# Report changes in test images
#
# This workflow checks for image diffs in a pull request and adds a GitHub
# comment showing the diff.
#
# It is triggered in a PR when any *.dvc files have been added, modified,
# or deleted. A GitHub comment will be published that contains a summary table
# of the images that have changed along with a visual report.
#
name: DVC image diff
on:
pull_request:
paths:
- '**/*.dvc'
permissions:
contents: read
pull-requests: write
jobs:
dvc-diff:
name: DVC image diff
runs-on: ubuntu-latest
defaults:
run:
shell: bash -l {0}
steps:
- name: Checkout
uses: actions/checkout@v6.0.2
- name: Setup data version control (DVC)
uses: iterative/setup-dvc@v2
- name: Pull baseline image data from dvc remote
id: dvc-pull
continue-on-error: true
env:
DAGSHUB_TOKEN: ${{ secrets.DAGSHUB_TOKEN }}
run: |
dvc remote modify origin url https://${DAGSHUB_TOKEN}@dagshub.com/GenericMappingTools/gmt.dvc --local
dvc pull --remote origin --no-run-cache
dvc pull --remote origin --no-run-cache
- name: Download DVC cache from GitHub Artifacts and restore baseline image data
if: steps.dvc-pull.outcome == 'failure'
run: |
gh run download -n dvc-cache -D .dvc/cache/
dvc checkout --force
env:
GH_TOKEN: ${{ github.token }}
- name: Setup continuous machine learning (CML)
uses: iterative/setup-cml@v3
- name: Setup Micromamba
uses: mamba-org/setup-micromamba@v3.0.0
with:
environment-name: gmt
condarc: |
channels:
- conda-forge
- nodefaults
cache-downloads: true
cache-environment: true
create-args: >-
gmt
# Produce the markdown diff report, which should look like:
# ## Summary of changed images
#
# This is an auto-generated report of images that have changed on the DVC remote
#
# | Status | Path |
# |----------|-------------------------------------|
# | added | test/baseline/test_image.png |
# | deleted | test/baseline/test_image2.png |
# | modified | test/baseline/test_image3.png |
#
# ## Image diff(s)
#
# <details>
# ...
# </details>
#
# Report last updated at commit abcdef
- name: Generate the image diff report
env:
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
DAGSHUB_TOKEN: ${{ secrets.DAGSHUB_TOKEN }}
run: |
echo -e "## Summary of changed images\n" > report.md
echo -e "This is an auto-generated report of images that have changed on the DVC remote\n" >> report.md
# Fetch the master branch
git fetch origin master
dvc diff --md master HEAD >> report.md
# Get just the filename of the added and modified image from the report
awk 'NF==5 && NR>=7 && $2=="added" && $4 ~ /.ps$/ {print $4}' report.md > added_files.txt
awk 'NF==5 && NR>=7 && $2=="modified" && $4 ~ /.ps$/ {print $4}' report.md > modified_files.txt
# Backup new images in a different directory
mkdir -p newbaseline/test && cp -r test/baseline newbaseline/test/
mkdir -p newbaseline/doc/examples && cp -r doc/examples/images newbaseline/doc/examples/
mkdir -p newbaseline/doc/scripts && cp -r doc/scripts/images newbaseline/doc/scripts/
rm -r test/baseline/**/*.ps doc/examples/images/*.ps doc/scripts/images/*.ps
# Restore images for the master branch, using the DVC remote when available.
git checkout master
if ! dvc pull --remote origin --no-run-cache; then
dvc checkout --force
fi
if ! dvc pull --remote origin --no-run-cache; then
dvc checkout --force
fi
# Append each image to the markdown report
echo -e "## Image diff(s)\n" >> report.md
echo -e "<details>\n" >> report.md
# Added images
echo -e "### Added images\n" >> report.md
while IFS= read -r line; do
gmt psconvert -A -P -Tg $line "newbaseline/$line"
echo -e "- ${line%.*}.png \n" >> report.md
echo -e "" >> report.md
done < added_files.txt
# Modified images
echo -e "### Modified images\n" >> report.md
echo -e "| Path | Old | New |" >> report.md
echo -e "|---|---|---|" >> report.md
while IFS= read -r line; do
gmt psconvert -A -P -Tg $line "newbaseline/$line"
echo -e "| $line |  |  |" >> report.md
done < modified_files.txt
echo -e "</details>\n" >> report.md
# Mention git commit SHA in the report
echo -e "Report last updated at commit ${{ github.event.pull_request.head.sha }}" >> report.md
# create/update PR comment
cml comment update report.md