Commit b98b4e1
Deduplicate and curate MIME types database
- Run dedup-mime-types.py to resolve conflicts, preferring image/ types
over other categories, non-x- subtypes over x- variants, and avoiding
application/octet-stream
- Commented-out alternatives remain visible for manual adjustment
- Manual overrides: model/ types for 3D formats (obj, ply, stl, xyz),
image/wmf over windows/metafile, text/javascript for .js,
video/mp4 for .mp4/.m4v, application/xml for .xml
- Added entries: .ijm (text/x-imagej-macro), .r (text/x-r-source),
.ply (model/ply), .obj (model/obj)
- Removed malformed entries (.aff with no MIME type, .aif -> .aff)
Claude performed the deduplication via the following Python script:
#!/usr/bin/env python3
"""
Deduplicate mime-types.txt by keeping the best MIME type for each extension.
Dropped alternatives are commented out in-place so they remain visible.
Preference order (lower score = better):
0 - image/* non-x- subtype (e.g. image/tiff)
1 - other non-x- subtype (e.g. audio/aiff, video/mp4)
2 - image/x-* subtype (e.g. image/x-tiff)
3 - other x- subtype (e.g. audio/x-aiff)
4 - application/octet-stream (generic fallback, avoid)
"""
import sys
from collections import defaultdict
INPUT = "src/main/resources/org/scijava/desktop/mime-types.txt"
def preference(mime):
if mime == "application/octet-stream":
return (4, mime)
category, _, subtype = mime.partition("/")
x_prefixed = subtype.startswith("x-")
if category == "image" and not x_prefixed:
return (0, mime)
if not x_prefixed:
return (1, mime)
if category == "image":
return (2, mime)
return (3, mime)
leading = [] # comment/blank lines before first data line
best = {} # ext -> winning mime
dropped = defaultdict(list) # ext -> [dropped mimes in order encountered]
ext_order = [] # insertion order of extensions
in_data = False
with open(INPUT) as f:
for line in f:
stripped = line.rstrip("\n")
if not in_data and (stripped.startswith("#") or not stripped):
leading.append(stripped)
continue
in_data = True
if stripped.startswith("#") or not stripped:
# comments interspersed in data section — preserve as leading context
leading.append(stripped)
continue
parts = stripped.split("\t", 1)
if len(parts) < 2:
print(f" skipping (no MIME type): {stripped!r}", file=sys.stderr)
continue
ext, mime = parts
if ext not in best:
best[ext] = mime
ext_order.append(ext)
elif preference(mime) < preference(best[ext]):
print(f" {ext}: preferring {mime!r} over {best[ext]!r}", file=sys.stderr)
dropped[ext].append(best[ext])
best[ext] = mime
else:
print(f" {ext}: keeping {best[ext]!r}, dropping {mime!r}", file=sys.stderr)
dropped[ext].append(mime)
with open(INPUT, "w") as f:
for line in leading:
f.write(line + "\n")
for ext in ext_order:
f.write(f"{ext}\t{best[ext]}\n")
for alt in dropped[ext]:
f.write(f"#{ext}\t{alt}\n")
print(f"\n{len(ext_order)} unique extensions written.", file=sys.stderr)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>1 parent 60582a3 commit b98b4e1
1 file changed
+349
-333
lines changed
0 commit comments