-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathformat_xml.py
More file actions
70 lines (58 loc) · 2.01 KB
/
format_xml.py
File metadata and controls
70 lines (58 loc) · 2.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from pathlib import Path
from typing import List
import xml.etree.ElementTree as ET
import argparse
def get_node_key(node, attr=None):
"""Return the sorting key of an xml node
using tag and attributes
"""
if attr is None:
return "%s" % node.tag + ":".join([node.get(attr) for attr in sorted(node.attrib)])
if attr in node.attrib:
return "%s:%s" % (node.tag, node.get(attr))
return "%s" % node.tag
def sort_children(node, attr=None):
"""Sort children along tag and given attribute.
if attr is None, sort along all attributes"""
if not isinstance(node.tag, str):
# not a TAG, it is comment or DATA
# no need to sort
return
# sort attributes by key, works only on Python 3.7+
node.attrib = dict(sorted(node.attrib.items()))
# sort child along attr
node[:] = sorted(node, key=lambda child: get_node_key(child, attr))
# and recurse
for child in node:
sort_children(child, attr)
def sort(unsorted_file, sorted_file, attr=None):
"""Sort unsorted xml file and save to sorted_file"""
try:
tree = ET.parse(unsorted_file)
except Exception as e:
print("Error parsing: " + str(e))
return
root = tree.getroot()
sort_children(root, attr)
ET.indent(tree, space=2 * " ", level=0)
sorted_unicode = ET.tostring(root, encoding="unicode")
with open(sorted_file, "w", encoding="utf-8") as output_fp:
output_fp.write(sorted_unicode)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Sort XML elements alphabetically for better diffing.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"inputs",
metavar="INPUT",
nargs="+",
type=str,
help="XML files to process.",
)
args = parser.parse_args()
inputs: List[Path] = [Path(a).resolve() for a in args.inputs]
for input in inputs:
print(f"Processing {input}")
sort(input, input)
print(f"Done")