imageFileConvertion/convert_all.py at main · VirtualFlyBrain/imageFileConvertion · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
#!/usr/bin/env python3
"""
Main batch conversion script for VFB image files.

Given a list of VFB IDs (or a template ID to process all aligned images),
this script determines the appropriate conversion strategy for each image
and dispatches to the correct converter:

  A. Templates with painted domains → convert_templates
  B. Neurons with full OBJ meshes  → convert_obj_meshes
  C. SWC skeletons without meshes  → convert_swc_to_mesh
  D. NRRD volumes (fallback)       → convert_nrrd

Usage:
  # Convert a specific template with all its painted domains
  python convert_all.py --template VFB_00101567 --output-dir output/ --verbose

  # Convert a list of VFB IDs
  python convert_all.py --ids VFB_jrchjx4q VFB_fw121683 --output-dir output/ --verbose

  # Convert from a file listing VFB IDs (one per line)
  python convert_all.py --ids-file vfb_ids.txt --output-dir output/ --verbose
"""
from __future__ import annotations

import argparse
import json
import os
import sys
import tempfile

import requests


def vfb_image_url(vfb_id: str, template_id: str, filename: str) -> str:
    prefix = vfb_id.replace("VFB_", "")
    first4, last4 = prefix[:4], prefix[4:]
    return f"https://www.virtualflybrain.org/data/VFB/i/{first4}/{last4}/{template_id}/{filename}"


def check_url_exists(url: str, timeout: int = 10) -> bool:
    """Check if a URL returns a successful status (HEAD request)."""
    try:
        resp = requests.head(url, timeout=timeout, allow_redirects=True)
        return resp.status_code == 200
    except requests.RequestException:
        return False


def check_obj_has_faces(url: str, timeout: int = 30) -> bool:
    """Download enough of an OBJ to determine if it has face definitions."""
    try:
        resp = requests.get(url, stream=True, timeout=timeout)
        resp.raise_for_status()
        # Read first 64KB — enough to tell if there are faces
        content = b""
        for chunk in resp.iter_content(1024):
            content += chunk
            if len(content) > 65536:
                break
        text = content.decode("utf-8", errors="ignore")
        for line in text.split("\n"):
            if line.startswith("f "):
                return True
        return False
    except requests.RequestException:
        return False


def classify_image(vfb_id: str, template_id: str, image_info: dict | None = None,
                   verbose: bool = False) -> str:
    """Determine the conversion category for a VFB image.

    Returns one of: "template", "obj_mesh", "swc_to_mesh", "nrrd"
    """
    # Check for SWC
    swc_url = vfb_image_url(vfb_id, template_id, "volume.swc")
    has_swc = check_url_exists(swc_url)

    # Check for volume_man.obj (manual/proper mesh)
    obj_man_url = vfb_image_url(vfb_id, template_id, "volume_man.obj")
    has_man_obj = check_url_exists(obj_man_url)

    obj_has_real_faces = False
    if has_man_obj:
        obj_has_real_faces = check_obj_has_faces(obj_man_url)

    if verbose:
        print(f"  [{vfb_id}] SWC={has_swc}, OBJ_man={has_man_obj}, OBJ_faces={obj_has_real_faces}")

    # If we have a proper mesh (with faces), use OBJ conversion (fastest)
    if has_man_obj and obj_has_real_faces:
        return "obj_mesh"

    # If we have SWC but no proper mesh, generate mesh from skeleton
    if has_swc:
        return "swc_to_mesh"

    # Fallback: generate mesh from NRRD volume
    return "nrrd"


def process_image(vfb_id: str, template_id: str, category: str,
                  output_dir: str, resolution: list[float],
                  verbose: bool = True):
    """Process a single VFB image using the appropriate converter."""

    if category == "obj_mesh":
        from convert_obj_meshes import convert_obj_to_precomputed, download_file
        url = vfb_image_url(vfb_id, template_id, "volume_man.obj")
        with tempfile.NamedTemporaryFile(suffix=".obj", delete=False) as tmp:
            tmp_path = tmp.name
        try:
            download_file(url, tmp_path)
            convert_obj_to_precomputed(
                tmp_path, output_dir, vfb_id,
                resolution=resolution, verbose=verbose,
            )
        finally:
            os.unlink(tmp_path)

    elif category == "swc_to_mesh":
        from convert_swc_to_mesh import (
            download_file,
            swc_to_obj,
        )
        from convert_obj_meshes import convert_obj_to_precomputed

        # Step 1: Download SWC and generate OBJ (durable artifact)
        url = vfb_image_url(vfb_id, template_id, "volume.swc")
        with tempfile.NamedTemporaryFile(suffix=".swc", delete=False) as tmp:
            tmp_path = tmp.name
        obj_path = os.path.join(output_dir, vfb_id + "_volume_man.obj")
        try:
            download_file(url, tmp_path)
            swc_to_obj(tmp_path, obj_path, verbose=verbose)
        finally:
            os.unlink(tmp_path)

        # Step 2: Convert the OBJ to precomputed
        convert_obj_to_precomputed(
            obj_path, output_dir, vfb_id,
            resolution=resolution, verbose=verbose,
        )

    elif category == "nrrd":
        from convert_nrrd import convert_nrrd, download_file
        url = vfb_image_url(vfb_id, template_id, "volume.nrrd")
        with tempfile.NamedTemporaryFile(suffix=".nrrd", delete=False) as tmp:
            tmp_path = tmp.name
        try:
            download_file(url, tmp_path)
            convert_nrrd(
                tmp_path, output_dir, vfb_id,
                merge_segments=True, verbose=verbose,
            )
        finally:
            os.unlink(tmp_path)

    else:
        raise ValueError(f"Unknown category: {category}")


def main():
    parser = argparse.ArgumentParser(
        description="Batch convert VFB image files to Neuroglancer precomputed format"
    )

    input_group = parser.add_mutually_exclusive_group(required=True)
    input_group.add_argument("--ids", nargs="+",
                             help="VFB image IDs to convert")
    input_group.add_argument("--ids-file",
                             help="File containing VFB IDs (one per line)")
    input_group.add_argument("--template",
                             help="Template ID — converts the template itself with painted domains")

    parser.add_argument("--template-id", default="VFB_00101567",
                        help="Template ID for aligned images (default: JRC2018Unisex)")
    parser.add_argument("--output-dir", required=True,
                        help="Output directory for precomputed datasets")
    parser.add_argument("--resolution", type=float, nargs=3,
                        default=[518.9161, 518.9161, 1000.0],
                        help="Voxel resolution in nm [x y z] (default: JRC2018U)")
    parser.add_argument("--verbose", action="store_true")
    parser.add_argument("--dry-run", action="store_true",
                        help="Classify images but don't convert")
    args = parser.parse_args()

    output_dir = os.path.abspath(os.path.expanduser(args.output_dir))
    os.makedirs(output_dir, exist_ok=True)

    # Handle template conversion
    if args.template:
        from convert_templates import convert_template
        if args.verbose:
            print(f"Converting template: {args.template}")
        convert_template(
            template_id=args.template,
            domains={},  # Will use numeric IDs; provide --domains-json to convert_templates.py for labels
            output_dir=output_dir,
            verbose=args.verbose,
        )
        print(f"Done. Template output at: {output_dir}/{args.template}")
        return

    # Collect VFB IDs
    if args.ids:
        vfb_ids = args.ids
    else:
        with open(args.ids_file) as f:
            vfb_ids = [line.strip() for line in f if line.strip() and not line.startswith("#")]

    if args.verbose:
        print(f"Processing {len(vfb_ids)} images against template {args.template_id}")

    # Classify and process
    results = {"obj_mesh": 0, "swc_to_mesh": 0, "nrrd": 0, "failed": 0}

    for i, vfb_id in enumerate(vfb_ids, 1):
        print(f"\n[{i}/{len(vfb_ids)}] {vfb_id}")

        try:
            category = classify_image(vfb_id, args.template_id, verbose=args.verbose)
            print(f"  Category: {category}")

            if args.dry_run:
                results[category] = results.get(category, 0) + 1
                continue

            process_image(
                vfb_id, args.template_id, category,
                output_dir, args.resolution, verbose=args.verbose,
            )
            results[category] += 1

        except Exception as e:
            print(f"  ERROR: {e}")
            results["failed"] += 1
            if args.verbose:
                import traceback
                traceback.print_exc()

    # Summary
    print(f"\n{'=' * 60}")
    print(f"Conversion Summary:")
    print(f"{'=' * 60}")
    print(f"  OBJ mesh (direct):     {results['obj_mesh']}")
    print(f"  SWC → mesh:            {results['swc_to_mesh']}")
    print(f"  NRRD → mesh:           {results['nrrd']}")
    print(f"  Failed:                {results['failed']}")
    print(f"  Total:                 {sum(results.values())}")


if __name__ == "__main__":
    main()