1+ #!/usr/bin/env python3
2+ """
3+ Standalone CLI for TEI2LossyJSON converter.
4+
5+ This script provides a command-line interface for converting TEI XML files to JSON format
6+ using the TEI2LossyJSONConverter.
7+ """
8+ import argparse
9+ import json
10+ import logging
11+ import sys
12+ from pathlib import Path
13+
14+ from .TEI2LossyJSON import TEI2LossyJSONConverter
15+
16+
17+ def setup_logging (verbose : bool = False ):
18+ """Setup logging configuration."""
19+ level = logging .INFO if verbose else logging .WARNING
20+ logging .basicConfig (
21+ level = level ,
22+ format = '%(asctime)s - %(levelname)s - %(message)s' ,
23+ datefmt = '%Y-%m-%d %H:%M:%S'
24+ )
25+
26+
27+ def convert_single_file (input_file : Path , output_file : Path , verbose : bool = False ) -> bool :
28+ """Convert a single TEI file to JSON format."""
29+ try :
30+ if verbose :
31+ logging .info (f"Converting { input_file } to { output_file } " )
32+
33+ converter = TEI2LossyJSONConverter ()
34+ result = converter .convert_tei_file (input_file , stream = False )
35+
36+ if result is None :
37+ logging .error (f"Failed to convert { input_file } : TEI file is not well-formed or empty" )
38+ return False
39+
40+ # Ensure output directory exists
41+ output_file .parent .mkdir (parents = True , exist_ok = True )
42+
43+ # Write JSON output
44+ with open (output_file , 'w' , encoding = 'utf-8' ) as f :
45+ json .dump (result , f , indent = 2 , ensure_ascii = False )
46+
47+ if verbose :
48+ logging .info (f"Successfully converted { input_file } to { output_file } " )
49+
50+ return True
51+
52+ except Exception as e :
53+ logging .error (f"Error converting { input_file } : { str (e )} " )
54+ return False
55+
56+
57+ def main ():
58+ """Main CLI entry point."""
59+ parser = argparse .ArgumentParser (
60+ description = "Convert TEI XML files to JSON format using TEI2LossyJSON converter" ,
61+ formatter_class = argparse .RawDescriptionHelpFormatter ,
62+ epilog = """
63+ Examples:
64+ # Convert a single TEI file
65+ python -m grobid_client.format.TEI2LossyJSON --input input.tei.xml --output output.json
66+
67+ # Convert with verbose logging
68+ python -m grobid_client.format.TEI2LossyJSON --input input.tei.xml --output output.json --verbose
69+
70+ # Convert and output to stdout
71+ python -m grobid_client.format.TEI2LossyJSON --input input.tei.xml
72+ """
73+ )
74+
75+ parser .add_argument (
76+ "--input" , "-i" ,
77+ type = Path ,
78+ required = True ,
79+ help = "Input TEI XML file to convert"
80+ )
81+
82+ parser .add_argument (
83+ "--output" , "-o" ,
84+ type = Path ,
85+ help = "Output JSON file (if not specified, prints to stdout)"
86+ )
87+
88+ parser .add_argument (
89+ "--verbose" , "-v" ,
90+ action = "store_true" ,
91+ help = "Enable verbose logging"
92+ )
93+
94+ args = parser .parse_args ()
95+
96+ # Setup logging
97+ setup_logging (args .verbose )
98+
99+ # Validate input file
100+ if not args .input .exists ():
101+ logging .error (f"Input file does not exist: { args .input } " )
102+ sys .exit (1 )
103+
104+ if not args .input .is_file ():
105+ logging .error (f"Input path is not a file: { args .input } " )
106+ sys .exit (1 )
107+
108+ # Convert the file
109+ if args .output :
110+ success = convert_single_file (args .input , args .output , args .verbose )
111+ sys .exit (0 if success else 1 )
112+ else :
113+ # Output to stdout
114+ try :
115+ converter = TEI2LossyJSONConverter ()
116+ result = converter .convert_tei_file (args .input , stream = False )
117+
118+ if result is None :
119+ logging .error (f"Failed to convert { args .input } : TEI file is not well-formed or empty" )
120+ sys .exit (1 )
121+
122+ # Print JSON to stdout
123+ print (json .dumps (result , indent = 2 , ensure_ascii = False ))
124+
125+ except Exception as e :
126+ logging .error (f"Error converting { args .input } : { str (e )} " )
127+ sys .exit (1 )
128+
129+
130+ if __name__ == "__main__" :
131+ main ()
0 commit comments